Contents

1 Introduction

This package contains the data of bioenergetic features of 152 CLL samples and 9 B cell samples, measured by Seahorse extracellular flux assay. This package also reproduces figures presented in the paper “Energy metabolism influences drug sensitivity and is co-determined by genetic variants in CLL” by Lu J, Böttcher M et al.


In this vignette we present the integrative analysis of CLL bioenergetic data set and the source code for the paper


This vignette was put together by Junyan Lu


This vignette is build from the sub-vignettes, which each can be build separatelly. The parts are separated by the horizontal lines. Each part finishes with removal of all the created objects.

library(seahorseCLL)
library(BloodCancerMultiOmics2017)
library(SummarizedExperiment)
library(ggbeeswarm)
library(xtable)
library(cowplot)
library(piano)
library(gridExtra)
library(grid)
library(genefilter)
library(pheatmap)
library(DESeq2)
library(survival)
library(maxstat)
library(survminer)
library(glmnet)
library(RColorBrewer)
library(reshape2)
library(gtable)
library(tidyverse)

2 Comparison between normal B cell and CLL cells

2.1 A global view of bioenergetic features in CLL and normal B-cells

Load data

data("seaBcell", "seaOri", "patmeta")

Combine the two data sets to one matrix

stopifnot(rownames(seaBcell) == rownames(seaOri))
seaOri$diagnosis <- patmeta[colnames(seaOri),]$Diagnosis
seaOri <- seaOri[,seaOri$diagnosis == "CLL"] # choose CLL samples

seaMat <- cbind(assays(seaBcell)$seaMedian, assays(seaOri)$seaMedian)
seaBatch <- rbind(colData(seaBcell)[,"dateMST", drop= FALSE], colData(seaOri)[,"dateMST", drop = FALSE])

#remove samples that contain NA values
seaMat <- seaMat[,complete.cases(t(seaMat))]

Principal component analysis

resPC <- prcomp(t(seaMat), center = TRUE, scale. = TRUE)
varExp <- resPC$sdev^2/sum(resPC$sdev^2)

Plot the first two principal components

#define color
colorList <- c(`B cell` = "#FF3030", CLL= "#1E90FF")

plotTab <- data.frame(resPC$x[,c(1,2)])
plotTab$type <- c(rep("B cell", ncol(seaBcell)), rep("CLL", nrow(plotTab)- ncol(seaBcell))) #10 normal b cell samples

pcaPlot <- ggplot(plotTab, aes(x=PC1, y=PC2, color = type)) + geom_point(size=3) + 
  xlab(sprintf("PC1 (%2.1f%s)",varExp[1]*100,"%")) + ylab(sprintf("PC2 (%2.1f%s)", varExp[2]*100, "%")) + 
  theme_bw() + theme(legend.position = c(0.9,0.9), legend.title = element_blank(), 
                     legend.background = element_rect(color="grey"),
                     axis.text = element_text(size =18),
                     axis.title = element_text(size = 20)) +
  scale_color_manual(values = colorList) + coord_cartesian(xlim = c(-5.5,5.5), ylim = c(-5.5,5.5))
pcaPlot

2.2 Associations between cell type and individual metabolic features

Prepare table for hypothesis test

seaMat <- data.frame(cbind(assays(seaBcell)$seaMedian, assays(seaOri)$seaMedian)) %>% rownames_to_column(var = "measure")
seaTab <- gather(seaMat, key = "patientID", value = "value", -measure) %>% 
  mutate(type = ifelse(substr(patientID, 1, 1) == "K", "B cell", "CLL")) %>% mutate(type = factor(type)) %>%
  mutate(batch = as.factor(seaBatch[patientID, ]))

t-test for each measurment

pTab <- group_by(seaTab, measure) %>% do((function(x) {
  res <- t.test(value ~ type, x, equal.var = TRUE)
  data.frame(p = res$p.value,
             diff = res$estimate[[2]] - res$estimate[[1]])
}) (.))
pTab$p.adj <- p.adjust(pTab$p, method = "BH")

ANOVA-test for each measurment (accounting for batch effect)

pTab.aov <- group_by(seaTab, measure) %>% do((function(x) {
  res <- summary(aov(value ~ type + batch, x))
  data.frame(p = res[[1]][["Pr(>F)"]][1])
}) (.))
pTab.aov$p.adj <- p.adjust(pTab.aov$p, method = "BH")

Expor the table to LaTex format using xtable

expTab <- pTab %>% ungroup() %>% mutate(measure = gsub("\\."," ",measure)) %>%
  rename("Seahorse measurement" = measure, "Difference of mean" = diff) %>%
  mutate(p = pTab.aov$p,  `adjusted p` = pTab.aov$p.adj) %>%
  select(-p.adj)
expTab[expTab$`Seahorse measurement` == "ECAR OCR ratio", 1] <- "ECAR/OCR"
fileConn <- file("section01/tTest_BcellVSCLL.tex")
writeLines(print(xtable(expTab, digits = 3, 
             caption = "ANOVA test results of energy metabolic measurements between CLL cells and normal B cells "), 
      include.rownames=FALSE,
      caption.placement = "top"), fileConn)
close(fileConn)

Beeswarms plot for select measurement

measureList <- c("basal.respiration","glycolysis","ATP.production","glycolytic.capacity","maximal.respiration","glycolytic.reserve")
gList <- lapply(measureList, function(seaName) {
  
  plotTab <- filter(seaTab, measure == seaName, !is.na(value))
  pval <- filter(pTab.aov, measure == seaName )$p
  
    
  #unit y (add unit to y axis, based on the type of measurement)
  if (seaName %in% c("basal.respiration","ATP.production","maximal.respiration")) {
    yLab <- "OCR (pMol/min)"
  } else yLab <- "ECAR (pMol/min)"
    
  #replace the "." in the mearement name with space
  seaName <- gsub("\\."," ",seaName)

  
  #plot title
  #plotTitle <- sprintf("p value = %s", format(pval, digits = 2, scientific = TRUE))
    plotTitle <- paste(sprintf("'%s (p = '~",seaName),
                     sciPretty(pval, digits = 2),"*')'")
  ggplot(plotTab, aes(x=type, y = value)) + 
       stat_boxplot(geom = "errorbar", width = 0.3) +
       geom_boxplot(outlier.shape = NA, col="black", width=0.4) + 
       geom_beeswarm(cex=2, size =0.5, aes(col = type)) + theme_classic() +
       xlab("") + ylab(yLab) + ggtitle(parse(text = plotTitle)) +
      theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
             axis.title.y = element_text(size=10, face="bold"),
             axis.text = element_text(size=11),
             plot.title = element_text(size = 12, hjust=0.5),
             legend.position = "none",
             axis.text.x = element_text(face="bold",size=12)) +
    scale_color_manual(values = colorList)
  
})
beePlot <- grid.arrange(grobs = gList, ncol=2)

Combine the PCA plot and beeswarm plots (Figure 1)

plot_grid(pcaPlot, beePlot, labels= c("A","B"), rel_widths = c(1,1))


3 Impact of genetic heterogeneity on energy metabolism of CLL

3.1 Data pre-processing

Load data

data("lpdAll", "seaOri","seaCombat", "patmeta", "mutCOM")

Subsetting

#overlap between the patient samples in seahorse dataset and main screen dataset
lpdCLL <- lpdAll[,pData(lpdAll)$Diagnosis %in% "CLL"]
seaMain <- intersect(colnames(seaOri),colnames(lpdCLL))
length(seaMain)
## [1] 140
#CLL seahorse data
seaSub <- t(assays(seaOri[,seaMain])$seaMedian)

Get patient genetic background

#extract genetic background information
genBack <- exprs(lpdCLL)[fData(lpdCLL)$type %in%
                          c("gen","Methylation_Cluster","IGHV"), seaMain]  

genBack <- genBack[! rownames(genBack) %in% 
                     c("del13q14_bi","del13q14_mono"),]

genBack <- data.frame(t(genBack))
colnames(genBack) <- replace(colnames(genBack), colnames(genBack) == "del13q14_any", "del13q14")

3.2 Hypothesis testing: genetic variants vs bioenergetic features

Pre-processing genetic background matrix

#get genetic background information
geneSub <- genBack[seaMain,]

geneSub <- t(geneSub)

#filtering genetic background matrix. 
#Each sample should have at least 10 records 
#and each variant should be exist in at least 5 samples

geneSub <- geneSub[rowSums(!is.na(geneSub)) >= 10,]
geneSub <- geneSub[rowSums(geneSub >=1, na.rm = TRUE) >= 5, ]

seaTest <- t(seaSub)

Perform ANOVA-test, including batch as a co-variate.

#record missing values
sea.noNA <- !is.na(seaTest)
mut.noNA <- !is.na(geneSub)

#prepare matrix to store raw p-values
p.raw.mat <- mean.mat <- matrix(NA, nrow(seaTest),nrow(geneSub))
colnames(p.raw.mat) <- colnames(mean.mat) <- rownames(geneSub)
rownames(p.raw.mat) <- rownames(mean.mat) <- rownames(seaTest)

#calculate p-value for each measurement-variant combination.
for (i in 1:nrow(seaTest)) {
    for (j in 1:nrow(geneSub)) {
        com.noNA <- (sea.noNA[i,] & mut.noNA[j,])
        genotype <- geneSub[j, com.noNA]
        batch <- factor(colData(seaOri)[names(com.noNA[com.noNA]),]$dateMST)
        
        if (length(genotype) >= 10 & sum(genotype) >= 5) {
            seaGen <- seaTest[i,com.noNA]
            genotype <- as.factor(genotype)
            res <- summary(aov(seaGen ~ genotype + batch))
            p.raw.mat[i,j] <- res[[1]][5][1,]
            mean.mat[i,j] <- mean(seaGen[genotype==1]) - mean(seaGen[genotype==0])
        }
    }
}

Multiple hypothesis testing

#processing the p-value and multi-hypothesis correlations.
p.tab <- melt(p.raw.mat)
colnames(p.tab) <- c('Measurement','Variant','P.raw')
p.tab$FC <- melt(mean.mat)$value
p.tab$Measurement <- as.character(p.tab$Measurement)
p.tab$Variant <- as.character(p.tab$Variant)
p.tab <- filter(p.tab, !is.na(P.raw)) #remove NAs
p.tab$P.adj <- p.adjust(p.tab$P.raw, method = "BH")
#select and show the significant one (according to the raw p value)
p.tab.sig <- p.tab[p.tab$P.raw <= 0.05,]
p.tab.sig <- p.tab.sig[order(p.tab.sig$Measurement),]
hist(p.tab$P.raw, breaks=20, col= "lightgreen", 
     main = "Seahorse VS genetics", xlab = "raw P values")

Create a table show significant associations (for supplementary table)

#pCut <- 0.01

tabOut <- filter(p.tab, P.adj <= 0.1) %>% mutate(Measurement = formatSea(Measurement)) %>%
  mutate(Variant = ifelse(Variant == "IGHV.Uppsala.U.M", "IGHV status", Variant)) %>%
  select(Measurement, Variant, P.raw, FC, P.adj)

colnames(tabOut) <- c("Seahorse measurment", "Genetic variant", "p value", "Difference of mean", "adjusted p value")

write(print(xtable(tabOut, digits = 3, 
             caption = "ANOVA test results (adjusted for batch effect) of energy metabolic measurements related to different genetic variants"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = "section02/tTest_SeahorseVSgene.tex")

Scatter plot of p values

#prepare table for plot

p.tab[p.tab$P.adj >= 0.05,]$Variant <- "not significant"

#prepare table for plot
plotTab <- mutate(p.tab, Variant = ifelse(Variant == "IGHV.Uppsala.U.M", "IGHV status", Variant))
plotTab$Measurement <- sapply(plotTab$Measurement, function(x) {gsub("\\."," ",x)})

#define color
colList <- c("#c0508a", "#79c858", "#7e45b9", "#c0ad52",
             "#8c8bbd", "#c35c41", "#86bca8", "#4b3e3a","grey80")
varName <- sort(as.character(unique(plotTab$Variant)))
varName <- c(varName[varName != "not significant"],"not significant")
names(colList) <- varName

#define legend order
plotTab$Variant <- factor(plotTab$Variant, levels = rev(names(colList)))
#fdr cut-off
fdrCut <- max(filter(plotTab, Variant != "not significant")$P.raw)

p <- ggplot(data=plotTab, aes(x= Measurement, y=-log10(P.raw),col=Variant))+ 
  geom_jitter(size=3, width = 0.15) + 
  geom_hline(yintercept = -log10(fdrCut), linetype="dotted") + 
  ylab(expression(-log[10]*'('*p~value*')')) + xlab("Seahore measurements") +
  theme_bw() + scale_color_manual(values = colList) +
  theme(axis.text.x = element_text(vjust=0.5, hjust = 1,size=15),
        axis.text.y = element_text(size=15),
        axis.title = element_text(size =18)) +
  annotate(geom = "text", x = 0.7, y = -log10(fdrCut) - 0.5, label = "5% FDR") +
  guides(color=guide_legend(title="Genetic variant")) +
  coord_flip()
plot(p)

Plot all significant associations as beeswarm plot

p.tab.sig <- p.tab[p.tab$Variant != "not significant",]

#batch effect corrected values should be used for plot
seaPlot <- assays(seaCombat)$seaMedian
seaPlot <- seaPlot[rownames(seaTest), colnames(seaTest)]

gList <- lapply(seq(1,nrow(p.tab.sig)), function(i) {
  seaName <- as.character(p.tab.sig[i,1])
  geneName <- as.character(p.tab.sig[i,2])
  pval <- p.tab.sig[i,3]
  
  plotTab <- tibble(Measurement = seaPlot[seaName,], Mutation = geneSub[geneName,]) %>% 
    filter(!is.na(Measurement), !is.na(Mutation))
  
  #for IGHV
  if (geneName == "IGHV.Uppsala.U.M") {
    geneName <- "IGHV status"
    plotTab <- mutate(plotTab, 
                      Status = ifelse(Mutation == 0, 
                                      sprintf("unmutated (n=%s)", sum(Mutation == 0)), 
                                      sprintf("mutated (n=%s)", sum(Mutation == 1))))
  } else if (geneName == "Methylation_Cluster") {
    plotTab <- mutate(plotTab,
                      Status = ifelse(Mutation == 0,sprintf("LP (n=%s)", sum(Mutation == 0)),
                                      ifelse(Mutation == 1, sprintf("IP (n=%s)", sum(Mutation == 1)),
                                             sprintf("HP (n=%s)", sum(Mutation == 2)))))
  } else plotTab <- mutate(plotTab, 
                           Status = ifelse(Mutation == 0,
                                          sprintf("wild type (n=%s)", sum(Mutation == 0)), 
                                          sprintf("mutated (n=%s)", sum(Mutation == 1))))
  
  #reverse label factor (wildtype always on the rigt)
  plotTab <- mutate(plotTab, Status = factor(Status)) %>% 
    mutate(Status = factor(Status, levels = rev(levels(Status))))
  
  # color scheme, black for wildtype, red for mutated
  if (length(levels(plotTab$Status)) == 2) {
    colorList <- c("black","red")
    names(colorList) <- levels(plotTab$Status)
  } else {
    colorList <- c("lightblue","blue", "darkblue")
    names(colorList) <- levels(plotTab$Status)
  }
  
  
  #set y label
  if (seaName %in% c("ECAR.OCR.ration")) {
    yLab = "ECAR/OCR"
  } else if (seaName %in% c("maximal.respiration","spare.respiratory.capacity","basal.respiration",
                            "ATP.production","OCR","proton.leak")) {
    yLab = "OCR (pMol/min)" } else yLab = "ECAR (pMol/min)"
  
  #replace the "." in the mearement name with space
  seaName <- gsub("\\."," ",seaName)
  
  #plot title
  plotTitle <- paste(sprintf("'%s ~ %s (p = '~",seaName,geneName),
                     sciPretty(pval, digits = 2),"*')'")
  
  ggplot(plotTab, aes(x=Status, y = Measurement)) + 
      stat_boxplot(geom = "errorbar", width = 0.3) +
      geom_boxplot(outlier.shape = NA, col="black", width=0.4) + 
      geom_beeswarm(cex=2, size =1, aes(col = Status)) + theme_classic() +
      xlab("") + ylab(yLab) + ggtitle(parse(text=plotTitle)) + 
      scale_color_manual(values = colorList) +
      theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
             axis.title = element_text(size=18, face="bold"),
             axis.text = element_text(size=18),
             plot.title = element_text(hjust=0.5,size=15),
             legend.position = "none",
             axis.title.x = element_text(face="bold"))
  
})

Combine p value scatter plot and beeswarm plots (Figure 2)

ggdraw() +
  draw_plot(p, 0, 0, 0.6, 1) +
  draw_plot(gList[[13]], 0.6, 0 , .4, .48) +
  draw_plot(gList[[15]], 0.6, 0.5 , .4, .48) +
  draw_plot_label(c("A", "B", "C"), 
                  c(0, 0.58, 0.58), c(1, 1, 0.5), size = 20)

Plot the rest in a separate pdf file (For supplementary figure)

gList[c(13,15)] <- NULL
grid.arrange(grobs = gList, ncol=2)


4 Associations between drug response phenotype and energy metabolism of CLL

4.1 Data pre-processing

Load data set

data("lpdAll","drugs", "patmeta", "seaCombat", "conctab")

Sample subsetting

#get drug response data for CLL samples only
lpdCLL <- lpdAll[fData(lpdAll)$type == "viab",pData(lpdAll)$Diagnosis == "CLL"]

#get overlapped samples
sampleOverlap <- intersect(colnames(lpdCLL), colnames(seaCombat))
seaSub <- seaCombat[,sampleOverlap]
lpdCLL <- lpdCLL[,sampleOverlap]

How many overlapped samples?

length(sampleOverlap)
## [1] 140

Remove bad drugs. Bortezomib lost its activity during storage. The data for this drug and NSC 74859 were discarded from further analysis.

badrugs = c("D_008", "D_025") 
lpdCLL <- lpdCLL[!fData(lpdCLL)$id %in% badrugs,]

Get drug response data

# get drug responsee data
get.drugresp <- function(lpd) {
  drugresp = t(exprs(lpd[fData(lpd)$type == 'viab'])) %>%
    tbl_df %>% dplyr::select(-ends_with(":5")) %>%
    dplyr::mutate(ID = colnames(lpd)) %>%
    tidyr::gather(drugconc, viab, -ID) %>%
    dplyr::mutate(drug = drugs[substring(drugconc, 1, 5), "name"],
           conc = sub("^D_([0-9]+_)", "", drugconc)) %>%
    dplyr::mutate(conc = as.integer(gsub("D_CHK_", "", conc)))
  
  drugresp
}
drugresp <- get.drugresp(lpdCLL)

Use median polish to summarise drug response of the five concentrations

get.medp <- function(drugresp) {
  tab = drugresp %>% group_by(drug, conc) %>% 
    do(data.frame(v = .$viab, ID = .$ID)) %>% spread(ID, v)
  
  med.p = lapply(unique(tab$drug), function(n) {
    tb = filter(tab, drug == n) %>% ungroup() %>% select(-(drug:conc)) %>% 
      as.matrix %>% `rownames<-`(1:5)
    mdp = stats::medpolish(tb)
    df = as.data.frame(mdp$col) + mdp$overall
    colnames(df) <- n
    df
  }) %>% do.call(cbind,.)
  
  medp.viab = tbl_df(med.p) %>% mutate(ID = rownames(med.p)) %>%
    gather(drug, viab, -ID) 
  medp.viab
}
drugresp.mp <- get.medp(drugresp)

4.2 Association test between drug response and seahorse measurements

4.2.1 Function to calculate correlation given a drug table and seahorse table

corTest <- function(patID, viab, seaTable, ighv = NULL) {
  viab <- setNames(viab, patID)
  
  corTab <- lapply(seq(1,nrow(seaTable)), function(i) {
    seaName <- rownames(seaTable)[i]
    
    #remove NA samples in Seahorse entry
    seaVal <- seaTable[seaName,]
    seaVal <- seaVal[!is.na(seaVal)]
    
    
    #get useable sample list
    if (!is.null(ighv)) {
      patList <- intersect(names(ighv), intersect(patID, names(seaVal)))
      ighvVal <- ighv[patList]
    } else {
      patList <- intersect(patID, names(seaVal))
    }
    
    #subset drug value
    drugVal <- viab[patList]
    seaVal <- seaVal[patList]
    
    #correlation test, block for IGHV
    if (!is.null(ighv)) {
      res <- summary(lm(seaVal ~ drugVal + ighvVal))
      data.frame(seahorse = seaName, 
               p = res$coefficients[2,4], 
               coef =  sqrt(res$r.squared) * sign(res$coefficients[2,3]),
               stringsAsFactors = FALSE)
    } else {
      res <- cor.test(seaVal, drugVal, method = "pearson")
      data.frame(seahorse = seaName, 
         p = res$p.value, 
         coef =  res$estimate[[1]],
         stringsAsFactors = FALSE)
      
    }
  }) %>% do.call(rbind,.)
}

4.2.2 Correlation test without blocking for IGHV

Calculate correlation coefficient and p values

seaTest <- assays(seaSub)$seaMedian
resTab.noBlock <- group_by(drugresp.mp, drug) %>% do(corTest(.$ID, .$viab, seaTest, ighv = NULL)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

How many significant associations at 10% FDR?

resTab.noBlock %>% filter(p.adj <= 0.1) %>% nrow()
## [1] 118

How many drugs show at least one significant assocations?

resTab.noBlock %>% filter(p.adj <= 0.1) %>% filter(!duplicated(drug)) %>% nrow()
## [1] 32

4.2.3 Correlation test with blocking for IGHV

Calculate correlation coefficient and p values

#get IGHV stauts
ighv <- exprs(lpdAll)["IGHV Uppsala U/M",]
ighv <-ighv[!is.na(ighv)]


resTab <- group_by(drugresp.mp, drug) %>% do(corTest(.$ID, .$viab, seaTest, ighv = ighv)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

How many significant associations at 10% FDR?

resTab %>% filter(p.adj <= 0.1) %>% nrow()
## [1] 45

How many drugs show at least one significant assocations?

resTab %>% filter(p.adj <= 0.1) %>% filter(!duplicated(drug)) %>% nrow()
## [1] 19

4.3 Plot of distribution of correlations coefficient

ggplot(resTab.noBlock, aes(x=coef)) + geom_histogram(binwidth = 0.05, col = "darkblue", fill = "lightblue") + theme_classic() + 
  theme(panel.grid = element_blank(), axis.title = element_text(size = 12, face = "bold"),
        axis.text = element_text(size=12), plot.title = element_text(size=15, hjust =.5, face = "bold")) + xlab("Pearson correlation coefficient")

4.4 P-value scatter plot

4.4.1 Correlation test with blocking for IGHV status (Figure 4)

Preocess table for plotting

atLeastOne <- group_by(resTab, drug) %>% summarise(sigNum = sum(p.adj <= 0.1)) %>% filter(sigNum > 0)
plotTab <- filter(resTab, drug %in% atLeastOne$drug) %>% 
  mutate(seahorse = ifelse(p.adj > 0.1, "not significant", seahorse))

#change mearement name 
plotTab$seahorse <- sapply(plotTab$seahorse, function(x) {gsub("\\."," ",x)})

#define the group of seahorse measurement
measureType <- tibble(measure = rownames(seaCombat), type = rowData(seaCombat)$type) %>% 
  mutate(type = ifelse(type %in% c("ECAR.OCR","GST","ECAR"), "glycolysis", "respiration"))

#generate color list separately for each group
glyList <- setNames(tail(brewer.pal(9,"OrRd"),nrow(filter(measureType, type =="glycolysis"))),
                    filter(measureType, type =="glycolysis")$measure)
resList <- setNames(tail(brewer.pal(9,"GnBu"),nrow(filter(measureType, type =="respiration"))),
                    filter(measureType, type =="respiration")$measure)
nosig <- c("not significant" = "grey80")
colList <- c(glyList, resList, nosig)
names(colList) <- sapply(names(colList), function(x) {gsub("\\."," ",x)})

#order the factor for seahorse measurment
plotTab$seahorse <- factor(plotTab$seahorse, levels = names(colList))

#add the direction of correlation
plotTab <- mutate(plotTab, Direction = ifelse(coef > 0, "positive", "negative"))

#get the cutoff value
fdrCut <- max(filter(plotTab, seahorse != "not significant")$p)

Plot

p <- ggplot(data = plotTab, aes(x = drug,y=-log10(p), fill = seahorse, color = seahorse, shape = Direction)) + 
  geom_point(size=4) + scale_fill_manual(values = colList) + scale_color_manual(values = colList) +
  geom_hline(yintercept = -log10(fdrCut), linetype="dotted") + 
  ylab(expression(-log[10]*'('*p~value*')')) + xlab("") + theme_bw() + 
  scale_shape_manual(values = c(positive = 24, negative = 25)) +
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust = 1,size=15),
        axis.text.y = element_text(size=15),
        axis.title = element_text(size =15, face = "bold")) +
  guides(fill="none", color = guide_legend(title = "Measurement"))

plot(p)

4.4.2 Correlation test without blocking for IGHV status (Supplementary Figure)

Preocess table for plotting

atLeastOne <- group_by(resTab.noBlock, drug) %>% summarise(sigNum = sum(p.adj <= 0.1)) %>% filter(sigNum > 0)
plotTab <- filter(resTab.noBlock, drug %in% atLeastOne$drug) %>% 
  mutate(seahorse = ifelse(p.adj > 0.1, "not significant", seahorse))

#change mearement name 
plotTab$seahorse <- sapply(plotTab$seahorse, function(x) {gsub("\\."," ",x)})

#define the group of seahorse measurement
measureType <- tibble(measure = rownames(seaCombat), type = rowData(seaCombat)$type) %>% 
  mutate(type = ifelse(type %in% c("ECAR.OCR","GST","ECAR"), "glycolysis", "respiration"))

#generate color list separately for each group
glyList <- setNames(tail(brewer.pal(9,"OrRd"),nrow(filter(measureType, type =="glycolysis"))),
                    filter(measureType, type =="glycolysis")$measure)
resList <- setNames(tail(brewer.pal(9,"GnBu"),nrow(filter(measureType, type =="respiration"))),
                    filter(measureType, type =="respiration")$measure)

nosig <- c("not significant" = "grey80")
colList <- c(glyList, resList, nosig)
names(colList) <- sapply(names(colList), function(x) {gsub("\\."," ",x)})

#order the factor for seahorse measurment
plotTab$seahorse <- factor(plotTab$seahorse, levels = names(colList))

#add direction iformation
plotTab <- mutate(plotTab, Direction = ifelse(coef > 0, "positive", "negative"))

#get the cutoff value
fdrCut <- max(filter(plotTab, seahorse != "not significant")$p)
p <- ggplot(data = plotTab, aes(x = drug,y=-log10(p), fill = seahorse, color = seahorse, shape = Direction)) + 
  geom_point(size=4) + scale_fill_manual(values = colList) + scale_color_manual(values = colList) +
  geom_hline(yintercept = -log10(fdrCut), linetype="dotted") + 
  ylab(expression(-log[10]*'('*p~value*')')) + xlab("") + theme_bw() + 
  scale_shape_manual(values = c(positive = 24, negative = 25)) +
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust = 1,size=15),
        axis.text.y = element_text(size=15),
        axis.title = element_text(size =15, face = "bold")) +
  guides(fill="none", color = guide_legend(title = "Measurement"))

plot(p)

4.5 Scatter plot of associations

Scatter plot for all significant pairs

resTab.sig <- filter(resTab, p.adj <= 0.1)

scatterList <- lapply(seq(1,nrow(resTab.sig)), function(i){
  seaName <- resTab.sig[i,]$seahorse
  p <- resTab.sig[i,]$p
  coef <- format(resTab.sig[i,]$coef,digits = 2)
  drugName <- resTab.sig[i,]$drug
  
  #remove NA samples in Seahorse entry
  seaVal <- seaTest[seaName,]
  seaVal <- seaVal[!is.na(seaVal)]
  
  #get useable sample list
  patList <- intersect(filter(drugresp.mp, drug == drugName)$ID, names(seaVal))
  
  #set y label
  if (seaName %in% c("ECAR.OCR.ration")) {
    xLab = "ECAR/OCR"
  } else if (seaName %in% c("maximal.respiration","spare.respiratory.capacity","basal.respiration",
                            "ATP.production")) {
    xLab = "OCR (pMol/min)" } else xLab = "ECAR (pMol/min)"
  
  #format seahorse measurement name
  seaName.new <- ifelse(seaName == "ECAR.OCR.ratio", "ECAR/OCR", gsub("\\."," ",seaName))
  
  #prepare title
  plotTitle <- sprintf("%s ~ %s", drugName, seaName.new)
  
  #prepare plot table
  plotTab <- filter(drugresp.mp, drug == drugName, ID %in% patList) %>%
    mutate(sea=seaVal[ID])
  
  #prepare correlation test annotations
  annoText <- paste("'coefficient ='~",coef,"*","', p ='~",sciPretty(p,digits=2))
  limX <- max(plotTab$sea) + 2
  midX <- max(plotTab$sea)/2
  ggplot(plotTab, aes(x=sea,y=100*viab)) + geom_point(size=1) + 
    geom_smooth(method = "lmrob", se= FALSE) +
    xlab(xLab) + ylab("% viability after drug treatment") +
    theme_bw() + ggtitle(plotTitle) + coord_cartesian(xlim = c(-2,limX)) +
    annotate("text", x = midX, y = Inf, label = annoText, 
             vjust=1, hjust=0.5, size = 5, parse = TRUE, col= "darkred") +
    theme(panel.grid = element_blank(),
          axis.title = element_text(size=13, face="bold"),
          axis.text = element_text(size=12),
          plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
          legend.position = "none",
          axis.title.x = element_text(face="bold"),
          plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
})

names(scatterList) <- paste0(resTab.sig$seahorse, "_", resTab.sig$drug)

A figure of selected drugs (Figure 5)

plot_grid(scatterList$glycolysis_rotenone, 
          scatterList$glycolysis_venetoclax,
          scatterList$glycolytic.capacity_orlistat,
          scatterList$`ECAR_KX2-391`,ncol=2)
## Warning: Computation failed in `stat_smooth()`:
## object 'lmrob' of mode 'function' was not found

## Warning: Computation failed in `stat_smooth()`:
## object 'lmrob' of mode 'function' was not found

## Warning: Computation failed in `stat_smooth()`:
## object 'lmrob' of mode 'function' was not found

## Warning: Computation failed in `stat_smooth()`:
## object 'lmrob' of mode 'function' was not found

4.6 Association test for individual concentrations

Association tests for each concentration

corRes_conc <- group_by(drugresp, drug, conc) %>% do(corTest(.$ID, .$viab, seaTest, ighv = NULL)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

Prepare plot tab

drugList <- unique(filter(corRes_conc, p.adj < 0.1)$drug)
seaList <- unique(filter(corRes_conc, p.adj < 0.1)$seahorse)

plotTab <- filter(corRes_conc, drug %in% drugList,
                  seahorse %in% seaList) %>% mutate(concIndex = paste0("c",conc)) %>%
  mutate(coef = ifelse(p.adj < 0.1, coef, 0),
         seahorse = ifelse(seahorse != "ECAR.OCR.ratio", seahorse,
                              "ECAR/OCR")) %>%
  mutate(seahorse = gsub("[.]","\n",seahorse)) %>%
  mutate(seahorse = factor(seahorse))

#plot similar seahorse measurment together
plotTab$seahorse <- factor(plotTab$seahorse, 
                              levels = levels(plotTab$seahorse)[c(3,4,5,6,7,
                                                                     9,1,2,8,11,10)])

Heatmap plot for p values (Supplementary Figure)

ggplot(plotTab, aes(x=concIndex, y = drug, fill = coef)) + geom_tile(size = 0.3, color = "white") + facet_wrap(~ seahorse, nrow = 1) + 
  scale_fill_gradient2(low = "red", mid = "white", high = "blue", midpoint = 0,
                       limits =c(-0.6,0.6), labels = seq(-0.8,0.8, by = 0.2),
                       breaks = seq(-0.8,0.8, by = 0.2),
                       name = "Coefficient") +
  theme_bw() + theme(strip.text = element_text(face = "bold"),
                     axis.text.y = element_text(size =12)) +
  ylab("Drug name") + xlab("Concentration Index")

4.7 Correlation of the responses between rotenone and orlistat

Get the drug response data of rotenone and orlistat

lpdCLL <- lpdAll[fData(lpdAll)$type == "viab",pData(lpdAll)$Diagnosis == "CLL"]
lpdSub <- lpdCLL[fData(lpdCLL)$name %in% c("rotenone","orlistat") & 
                     ! fData(lpdCLL)$subtype %in% c("4:5","1:5"),]
fData(lpdSub)$conc <- sapply(seq(1,nrow(lpdSub)), function(i) {
  conctab[fData(lpdSub)[i,]$id, paste0("c",fData(lpdSub)[i,]$subtype)]
})

viabTab <- data.frame(exprs(lpdSub)) %>% rownames_to_column("drugID") %>% 
  gather(key = "patID", value = "viab", -drugID) %>% as_tibble() %>%
  mutate(conc = fData(lpdSub)[drugID,]$conc,
         name = fData(lpdSub)[drugID,]$name) %>%
  select(-drugID) %>% spread(key = name, value = viab)

Perform correlation test

testTab <- viabTab %>% group_by(conc) %>%
  do(data.frame(p = cor.test(.$rotenone, .$orlistat)$p.value,
                coef = cor(.$rotenone, .$orlistat)))

Plot the correlations (Supplementary Figure)

pList <- lapply(seq(nrow(testTab)), function(i) {
  p = testTab[i,]$p
  coef = format(testTab[i,]$coef,digits = 2)
  conc1 = testTab[i,]$conc
  plotTab <- filter(viabTab, conc == conc1)
  
  annoText <- paste("'coefficient ='~",coef,"*","', p ='~",sciPretty(p,digits=2))
  ggplot(plotTab, aes(x=100*rotenone, y = 100*orlistat)) + geom_point() + geom_smooth(method = "lmrob", se = FALSE) +
    coord_cartesian(xlim = c(0,120), ylim = c(0,120)) + 
    ggtitle(bquote("Concentration ="~.(conc1)~mu*"M")) +
    annotate("text", x = 0, y = 123, label = annoText, vjust=1, hjust=0, size =4 , color = "darkred",parse = TRUE) +
    xlab("rotenone (% viability)") + ylab("orlistat (% viability)") + 
    theme_bw() +
    theme(panel.grid = element_blank(),
      axis.title = element_text(size=15, face="bold"),
      axis.text = element_text(size=10),
      plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
      legend.position = "none",
      axis.title.x = element_text(face="bold"),
      plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
})
grid.arrange(grobs= pList, ncol =2)


5 Transcriptomic analysis

5.1 Differential expression between M-CLL and U-CLL samples

5.1.1 Data pre-processing

Load data set

data("dds", "patmeta","mutCOM","seaOri")

Only use CLL samples with IGHV annotations and have been used in seahorse experiments

#annotate IGHV status
dds$IGHV <- patmeta[match(dds$PatID, rownames(patmeta)),]$IGHV
dds$diag <- patmeta[match(dds$PatID, rownames(patmeta)),]$Diagnosis

#estimate size factor
dds <- estimateSizeFactors(dds)

#only choose CLL samples with IGHV annotations and have been used in seahorse experiments
ddsCLL <- dds[,dds$diag == "CLL" & !is.na(dds$IGHV) & dds$PatID %in% colnames(seaOri)]

#how many samples do we have?
ncol(ddsCLL)
## [1] 111

Filter genes

#remove genes without gene symbol annotations
ddsCLL <- ddsCLL[! rowData(ddsCLL)$symbol %in% c(NA,""),]
ddsCLL <- ddsCLL[rowData(ddsCLL)$chromosome != "Y",]

#only keep genes that have counts higher than 10 in any sample
keep <- apply(counts(ddsCLL), 1, function(x) any(x >= 10)) 
ddsCLL <- ddsCLL[keep,]

# Remove transcripts which do not show variance across samples.
sds <- rowSds(counts(ddsCLL, normalized = TRUE))
sh <- shorth(sds)
ddsCLL <- ddsCLL[sds >= sh,]

#how many genes do we have
nrow(ddsCLL)
## [1] 13744

Variance stabilizing tranformation

ddsCLL.norm <- varianceStabilizingTransformation(ddsCLL)

5.1.2 Identify differentially expressed gene between M-CLL and U-CLL using DESeq2

Differential expression using DESeq2

ddsCLL$IGHV <- factor(ddsCLL$IGHV, levels = c("U", "M"))
design(ddsCLL) <- ~ IGHV
ddsCLL <- DESeq(ddsCLL, betaPrior = TRUE)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 720 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing

Get differential expression result

DEres <- as.tibble(results(ddsCLL, tidy = TRUE)) %>% mutate(symbol = rowData(ddsCLL)$symbol)

5.2 Gene enrichment analysis

Function for converting DEseq results to enrichment analysis input

createInput <- function(DEres, pCut = 0.05, ifFDR = FALSE, rankBy = "stat") {
  if (ifFDR) {
    inputTab <- filter(DEres, padj <= pCut)
  } else {
    inputTab <- filter(DEres, pvalue <= pCut)
  }
  
  inputTab <- arrange(inputTab, pvalue) %>% filter(!duplicated(symbol)) %>% select_("symbol", rankBy) %>% data.frame(stringsAsFactors = FALSE)
  rownames(inputTab) <- inputTab$symbol
  inputTab$symbol <- NULL
  colnames(inputTab) <- "stat"
  return(inputTab)
}

load genesets

gmts = list(H=system.file("extdata","h.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
            C6=system.file("extdata","c6.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
            KEGG=system.file("extdata","c2.cp.kegg.v5.1.symbols.gmt", package = "BloodCancerMultiOmics2017"))

Enrichment analysis using Hallmarks gene set

enRes <- list()
inputTab <- createInput(DEres, pCut = 0.1, ifFDR = TRUE)
enRes[["Gene enrichment analysis"]] <- runGSEA(inputTab = inputTab, gmtFile = gmts$H, GSAmethod = "page")
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
#remove the HALLMARK_
enRes$`Gene enrichment analysis`$Name <- gsub("HALLMARK_","", enRes$`Gene enrichment analysis`$Name)

Plot hallmark result

enBar <- plotEnrichmentBar(enRes, pCut = 0.1, ifFDR = TRUE, setName = "Hallmark gene sets")
plot(enBar)

5.2.1 Heatmap plot of the expression values of genes in the glycolysis geneset

Prepare the data for heatmap

# load genes in the gene set
gsc <- loadGSC(gmts$H)
geneList <- gsc$gsc$HALLMARK_GLYCOLYSIS

#select differentially expressed genes
fdrCut <- 0.10
sigDE <- filter(DEres, padj <= fdrCut, log2FoldChange < 0) %>% filter(symbol %in% geneList) %>%
  arrange(log2FoldChange)

#get the expression matrix
plotMat <- assay(ddsCLL.norm[sigDE$row,])
colnames(plotMat) <- ddsCLL.norm$PatID
rownames(plotMat) <- sigDE$symbol

#sort columns of plot matrix based on trisomy12 status
plotMat <- plotMat[,order(ddsCLL$IGHV)]

#calculate z-score and sensor
plotMat <- t(scale(t(plotMat)))
plotMat[plotMat >= 4] <- 4
plotMat[plotMat <= -4] <- -4

annoCol <- data.frame(row.names = ddsCLL.norm$PatID, `IGHV` = ddsCLL.norm$IGHV)

Plot the heatmap

#color for colum annotation
annoColor <- list(IGHV = c(M = "red", U = "grey80"))

hallHeatmap <- pheatmap(plotMat, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
         cluster_cols = FALSE, cluster_rows = FALSE,
         annotation_col = annoCol, annotation_colors = annoColor,
         show_colnames = FALSE, fontsize_row = 8, breaks = seq(-5,5, length.out = 101), treeheight_row = 0,
         border_color = NA, main = "HALLMARK_GLYCOLYSIS",silent = TRUE)$gtable

grid.draw(hallHeatmap)

5.2.2 Beeswarm plot of expression values of key glycolytic genes

plotGenes <- c("PFKP","PGAM1","PGK1")
plotTab <- data.frame(assay(ddsCLL.norm[rowData(ddsCLL.norm)$symbol %in% plotGenes,]))
colnames(plotTab) <- ddsCLL.norm$PatID
plotTab$symbol <- rowData(ddsCLL.norm[rowData(ddsCLL.norm)$symbol %in% plotGenes,])$symbol
plotTab <- gather(plotTab, key = "PatID", value = "value", -symbol) %>%
  mutate(IGHV = colData(ddsCLL.norm)[match(PatID, ddsCLL.norm$PatID),]$IGHV)

beePlot <- ggplot(plotTab, aes(x=IGHV, y = value)) + 
    stat_boxplot(geom = "errorbar", width = 0.3) +
    geom_boxplot(outlier.shape = NA, col="black", width=0.4) + 
    geom_beeswarm(cex=2, size =0.5, aes(col = IGHV)) + theme_classic() +
    xlab("") + ylab("normalized expression") +
    scale_color_manual(values = c("M" = "red","U" = "grey30")) +
    theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
           axis.title = element_text(size=12, face="bold"),
           axis.text = element_text(size=12),
           plot.title = element_text(face="bold", hjust=0.5),
           legend.position = "none",
           axis.title.x = element_text(face="bold"),
          strip.background = element_blank(),
          strip.text = element_text(size=15, face = "bold")) +
  facet_wrap(~ symbol, scales = "free")

5.2.3 Combined plots for manuscript (Figure 3)

ggdraw() + 
  draw_plot(enBar, 0, 0.5, 0.5, 0.45) + 
  draw_plot(hallHeatmap, 0.5, 0, 0.5, 0.95) +
  draw_plot(beePlot, 0, 0, 0.5, 0.4) +
  draw_plot_label(c("A","B","C"), c(0, 0.5, 0), c(1, 1, 0.45), fontface = "plain", size=20)


6 Association between the clinical phenotype and energy metabolic features

6.1 Correlation between seahorse measurement and pretreatment status

6.1.1 Correlation test

Load data set

data("patmeta", "seaCombat","lpdAll", "pretreat","doublingTime")

Prepare data table for t-test

testTab <- assays(seaCombat)$seaMedian %>% data.frame() %>% rownames_to_column("Measurement") %>%
  gather(key = "patID", value = "value", -Measurement) %>%
  mutate(pretreated = pretreat[patID,]) %>%
  mutate(pretreated = factor(pretreated),
         IGHV = factor(exprs(lpdAll)["IGHV Uppsala U/M",patID])) %>% 
  as.tibble()

Performing t-test for each measurement

tTest <- function(x, y) {
  noNA <- !is.na(x) & !is.na(y)
  res <- t.test(x[noNA] ~ y[noNA], equal.var = TRUE)
  tibble(p = res$p.value,
         dm = res$estimate[[2]] - res$estimate[[1]])
}

pTab <- group_by(testTab, Measurement) %>% do(tTest(.$value, .$pretreated)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

Perform ANOVA test, including IGHV as a blocking factor

aovTest <- function(x,y,block) {
  noNA <- !is.na(x) & !is.na(y) & !is.na(block)
  dataTab <- data.frame(x = x[noNA], y = y[noNA], block = block[noNA])
  res <- anova(lm(x ~ y + block))
  tibble(p = res$`Pr(>F)`[1])
}

pTab.IGHV <- group_by(testTab, Measurement) %>% do(aovTest(.$value, .$pretreated, .$IGHV)) %>%
  ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))

A table as output

outTable <- tibble(`Seahorse mearuement` = formatSea(pTab$Measurement),
                   `p value` = format(pTab$p, digits = 2),
                   `adjusted p` = format(pTab$p.adj, digits = 3),
                   `p value (IGHV blocked)` = format(pTab.IGHV$p, digits = 2),
                   `adjusted p (IGHV blocked)` = format(pTab.IGHV$p.adj, digits = 3))

write(
print(xtable(outTable, digits = 3, 
             caption = "Student's t-test results of energy metabolic measurements related to pretreatment status"), 
      include.rownames=FALSE,
      caption.placement = "top")
,file = "section05/tTest_SeahorseVSpretreat.tex")

Association between IGHV status and pretreatment

IGHVtab <- filter(testTab, !duplicated(patID))
chiRes <- chisq.test(IGHVtab$pretreated, IGHVtab$IGHV)
chiRes
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  IGHVtab$pretreated and IGHVtab$IGHV
## X-squared = 18.157, df = 1, p-value = 2.034e-05

6.1.2 Plot significant associations as beeswarms plot (for supplementary figures)

Plot

seaList <- c("maximal.respiration", "spare.respiratory.capacity")
plotList <- lapply(seaList, function(seaName) {
  #prepare table for plotting
  plotTab <- filter(testTab, Measurement == seaName) %>% 
    mutate(Measurement = formatSea(Measurement),
           pretreated = ifelse(pretreated ==1, 
                               sprintf("yes (n=%s)", sum(pretreated == 1)), 
                               sprintf("no (n=%s)", sum(pretreated == 0))),
           IGHV = ifelse(is.na(IGHV), "unknown", 
                         ifelse(IGHV == 1, "mutated", "unmutated"))) %>%
    mutate(IGHV = factor(IGHV, levels = c("mutated","unmutated","unknown"))) %>%
    filter(!is.na(value))
  
  #p value for annotation
  pval <- filter(pTab, Measurement == seaName)$p
  
  #color for IGHV status
  colorList <- c(mutated = "red", unmutated = "black", unknown = "grey80")
  
  #formatted seaname
  seaTitle <- unique(plotTab$Measurement)
  
  #title 
  plotTitle <- paste(sprintf("'%s (p = '~",seaTitle),
                     sciPretty(pval, digits = 2),"*')'")
  
  ggplot(plotTab, aes(x=pretreated, y = value)) + 
    stat_boxplot(geom = "errorbar", width = 0.3) +
    geom_boxplot(outlier.shape = NA, col="black", width=0.4) + 
    geom_beeswarm(cex=2, size =1, aes(col = IGHV)) + theme_classic() +
    xlab("") + ylab("OCR (pMol/min)") + ggtitle(parse(text=plotTitle)) + 
    scale_color_manual(values = colorList) +
    theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
           axis.title = element_text(size=12, face="bold"),
           axis.text = element_text(size=12),
           plot.title = element_text(hjust=0.5,size=15),
           axis.title.x = element_text(face="bold"))
    
})

Save the plot

grid.arrange(grobs = plotList, ncol =2)

6.2 Correlation between seahorse measurement and lymphocyte doubling time

6.2.1 Correlation test

Pre-processing data

testTab <- assays(seaCombat)$seaMedian %>% data.frame() %>% rownames_to_column("Measurement") %>%
  gather(key = "patID", value = "value", -Measurement) %>%
  mutate(pretreated = pretreat[patID,],
         doubling.time = LDT[patID,]) %>%
  mutate(pretreated = factor(pretreated),
         IGHV = factor(exprs(lpdAll)["IGHV Uppsala U/M",patID])) %>% 
  filter(!is.na(doubling.time), !is.na(value)) %>%
  as.tibble()

Correlation test between seahorse measurement and doubling time

corTest <- function(x, y, block = NULL, method = "pearson") {
  if (is.null(block)) {
    res <- cor.test(x,y, method = method)
    tibble(p = res$p.value, coef = res$estimate[[1]])
  } else {
    tab <- data.frame(x = x, y = y, block = block)
    res <- summary(lm( y ~ x + block, tab)) #how much y can be explained by x and block
    tibble(p = res$coefficients[2,4],
           coef = cor(x,y))
  }
}

corRes <- group_by(testTab, Measurement) %>% do(corTest(.$value, .$doubling.time)) %>%
  ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))

Correlations between lymphocyte doubling time and IGHV stauts

pRes <- filter(testTab, !duplicated(patID)) %>% 
  do(data.frame(p = t.test(doubling.time ~ IGHV,.)$p.value))
pRes
##              p
## 1 2.635161e-07

Correlation test between seahorse measurement and doubling time (Considering IGHV as cofactor)

corRes.aov <- group_by(testTab, Measurement) %>% filter(!is.na(IGHV)) %>%
  do(corTest(x = .$value, y = .$doubling.time, block = .$IGHV)) %>%
  ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))

Correlation test between seahorse measurement and doubling time (within M-CLL)

corRes.M <- group_by(testTab, Measurement) %>% filter(IGHV %in% 1) %>%
  do(corTest(x = .$value, y = .$doubling.time)) %>%
  ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))

Correlation test between seahorse measurement and doubling time (within U-CLL)

corRes.U <- group_by(testTab, Measurement) %>% filter(IGHV %in% 0) %>%
  do(corTest(x = .$value, y = .$doubling.time)) %>%
  ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))

A table for output

outTable <- tibble(`Seahorse mearuement` = formatSea(corRes$Measurement),
                   `p value` = format(corRes$p, digits = 2),
                   `adjusted p` = format(corRes$p.adj, digits = 3),
                   `p value (IGHV blocked)` = format(corRes.aov$p, digits = 3),
                   `adjusted p (IGHV blocked)` = format(corRes.aov$p.adj, digits = 3))

write(
print(xtable(outTable, digits = 3, 
             caption = "Correlation tests between each Seahorse measurements and lymphocyte doubling time"), 
      include.rownames=FALSE,
      caption.placement = "top")
,"section05/tTest_SeahorseVSldt.tex")

6.2.2 Plot correlations (for supplementary figure)

seaList <- c("glycolysis", "glycolytic.capacity")
plotList.cor <- lapply(seaList, function(seaName) {
  #prepare table for plotting
  plotTab <- filter(testTab, Measurement == seaName) %>% 
    mutate(Measurement = formatSea(Measurement),
           IGHV = ifelse(is.na(IGHV), "unknown", 
                         ifelse(IGHV == 1, "mutated", "unmutated"))) %>%
    mutate(IGHV = factor(IGHV, levels = c("mutated","unmutated","unknown"))) %>%
    filter(!is.na(value), !is.na(doubling.time))
  
  #p value for annotation
  pval <- filter(corRes, Measurement == seaName)$p
  coef <- filter(corRes, Measurement == seaName)$coef
  
  #color for IGHV status
  colorList <- c(mutated = "red", unmutated = "black", unknown = "grey80")
  
  #formatted seaname
  seaTitle <- unique(plotTab$Measurement)
  
  
  #prepare correlation test annotations
  annoText <- paste("'coefficient ='~",format(coef,digits = 2),"*","', p ='~",sciPretty(pval,digits=2))
  
  ggplot(plotTab, aes(x=value,y=doubling.time)) + 
    geom_point(size=1, aes(col = IGHV)) + 
    geom_smooth(method = "lm", se= FALSE) +
    xlab("ECAR (pMol/min)") + ylab("Lymphocyte doubling time (days)") +
    theme_bw() + ggtitle(seaTitle) +
    scale_color_manual(values = colorList) +
    annotate("text", x = -Inf, y = Inf, label = annoText, 
             vjust=1, hjust=0, size = 5, parse = TRUE, col= "darkred") +
    theme(panel.grid = element_blank(),
          axis.title = element_text(size=13, face="bold"),
          axis.text = element_text(size=12),
          plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
          legend.position = "none",
          axis.title.x = element_text(face="bold"),
          plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
    
})

Show the plot

grid.arrange(grobs = plotList.cor, ncol =2)

Combine and save the plot

plot_grid(plotList[[1]], plotList[[2]],
          plotList.cor[[1]], plotList.cor[[2]],labels = "AUTO")

6.3 Correlation between seahorse measurement and survival

6.3.1 Prepare dataset

seaTable <- assays(seaCombat)$seaMedian

survT = patmeta[colnames(seaTable),]
survT[which(survT[,"IGHV"]=="U") ,"IGHV"] = 0
survT[which(survT[,"IGHV"]=="M") ,"IGHV"] = 1
survT$IGHV = as.numeric(survT$IGHV)

colnames(survT) = gsub("Age4Main", "age", colnames(survT))

#add seahorse measurement information
survT <- cbind(survT, t(seaTable))

# competinting risk endpoint fpr 
survT$compE <- ifelse(survT$treatedAfter == TRUE, 1, 0)
survT$compE <- ifelse(survT$treatedAfter == FALSE & survT$died==TRUE,
                      2, survT$compE )
survT$T7  <- ifelse(survT$compE == 1, survT$T5, survT$T6 )

6.3.2 Univariate survival analysis

6.3.2.1 Caclulation correlations

Function to calculate correlations

com <- function( Time, endpoint, scaleX, sub, d, split, drug_names) {  
  
  res <- lapply(d, function(g)  { 
  
  #drug <- survT[,g] * scaleX
  drug <- survT[,g]
  #drug <- (drug - mean(drug, na.rm = TRUE))/sd(drug, na.rm=TRUE)
  ## all=99, M-CLL=1, U-CLL=0
  if(sub==99) { surv <- coxph(Surv(survT[,paste0(Time)],
                                   survT[,paste0(endpoint)] == TRUE) ~ drug)} 
  if(sub<99)  { surv <- coxph(Surv(survT[,paste0(Time)],
                                   survT[,paste0(endpoint)] == TRUE) ~ drug,
                              subset=survT[,paste0(split)]==sub)}    
  
  c(summary(surv)[[7]][,5], summary(surv)[[7]][,2], 
                summary(surv)[[8]][,3], 
                summary(surv)[[8]][,4])
 })
 s <- do.call(rbind, res)
 colnames(s) <- c("p", "HR", "lower", "higher")
 rownames(s) <- drug_names
 s
}

All samples

d <- rownames(seaTable)
sea_names <- formatSea(d)

ttt <- com(Time="T5", endpoint="treatedAfter", sub=99, d=d,
          split="IGHV", drug_names=sea_names, scaleX=1)

   
os <-  com(Time="T6", endpoint="died", sub=99, d=d, split="",
          drug_names=sea_names, scaleX=1)

#TTT result
ttt
##                                     p        HR      lower   higher
## basal respiration          0.78487361 1.0045508 0.97232990 1.037839
## ATP production             0.61856940 1.0089384 0.97420635 1.044909
## proton leak                0.79523162 0.9941888 0.95137805 1.038926
## maximal respiration        0.04329390 1.0083468 1.00025052 1.016509
## spare respiratory capacity 0.02330588 1.0109372 1.00148009 1.020484
## glycolysis                 0.43103151 1.0277871 0.96000913 1.100350
## glycolytic capacity        0.05305026 1.0258682 0.99966404 1.052759
## glycolytic reserve         0.03378313 1.0358886 1.00270606 1.070169
## ECAR/OCR                   0.17679455 0.2350961 0.02876541 1.921411
## OCR                        0.10457119 1.0250543 0.99487711 1.056147
## ECAR                       0.57398172 0.9790316 0.90930921 1.054100
#OS result
os
##                                      p        HR      lower    higher
## basal respiration          0.445743838 1.0209332 0.96799051  1.076772
## ATP production             0.054477450 1.0627155 0.99883212  1.130685
## proton leak                0.230798917 0.9585454 0.89441307  1.027276
## maximal respiration        0.241401237 1.0082235 0.99450296  1.022133
## spare respiratory capacity 0.260001165 1.0093938 0.99310455  1.025950
## glycolysis                 0.153194519 1.0908935 0.96813843  1.229213
## glycolytic capacity        0.005552633 1.0728828 1.02084207  1.127576
## glycolytic reserve         0.003918966 1.0943347 1.02931759  1.163459
## ECAR/OCR                   0.867915535 1.3239198 0.04849462 36.143461
## OCR                        0.727618843 1.0086884 0.96076018  1.059008
## ECAR                       0.623678252 1.0313524 0.91169617  1.166713

M-CLL samples

d <- rownames(seaTable)
sea_names <- formatSea(d)

ttt.M <- com(Time="T5", endpoint="treatedAfter", sub=1, d=d,
          split="IGHV", drug_names=sea_names, scaleX=1)

   
os.M <-  com(Time="T6", endpoint="died", sub=1, d=d, 
           split="IGHV",drug_names=sea_names, scaleX=1)

#TTT result
ttt.M
##                                     p         HR        lower   higher
## basal respiration          0.66972710 1.01302726 0.9545326975 1.075106
## ATP production             0.26823880 1.03545664 0.9735172775 1.101337
## proton leak                0.39840912 0.96862771 0.8995549818 1.043004
## maximal respiration        0.53665512 1.00492687 0.9893814783 1.020717
## spare respiratory capacity 0.57126071 1.00514690 0.9874422205 1.023169
## glycolysis                 0.19675546 0.91814324 0.8064834965 1.045263
## glycolytic capacity        0.83294236 0.99483645 0.9481134323 1.043862
## glycolytic reserve         0.65507343 1.01398165 0.9540555009 1.077672
## ECAR/OCR                   0.08563539 0.02940216 0.0005271614 1.639891
## OCR                        0.28309251 1.03189088 0.9744044931 1.092769
## ECAR                       0.23953728 0.92290950 0.8074193715 1.054919
#OS result
os.M
##                                     p        HR      lower       higher
## basal respiration          0.30412487 0.9499771 0.86140242    1.0476596
## ATP production             0.27513686 1.0632632 0.95234566    1.1870990
## proton leak                0.01414948 0.8720443 0.78169459    0.9728369
## maximal respiration        0.34145002 0.9847134 0.95395111    1.0164676
## spare respiratory capacity 0.46372706 0.9870185 0.95311644    1.0221264
## glycolysis                 0.95777589 1.0058566 0.81031426    1.2485865
## glycolytic capacity        0.13875994 1.0781006 0.97593525    1.1909610
## glycolytic reserve         0.06252895 1.1162450 0.99426344    1.2531918
## ECAR/OCR                   0.46124679 7.5627436 0.03477659 1644.6435373
## OCR                        0.29729407 0.9364032 0.82755839    1.0595638
## ECAR                       0.97528422 1.0035431 0.80235073    1.2551851

U-CLL samples

d <- rownames(seaTable)
sea_names <- formatSea(d)

ttt.U <- com(Time="T5", endpoint="treatedAfter", sub=0, d=d,
          split="IGHV", drug_names=sea_names, scaleX=1)

   
os.U <-  com(Time="T6", endpoint="died", sub=0, d=d, 
           split="IGHV",drug_names=sea_names, scaleX=1)

#TTT result
ttt.U
##                                     p        HR      lower    higher
## basal respiration          0.57970659 0.9884185 0.94849803  1.030019
## ATP production             0.45865707 0.9827452 0.93853098  1.029042
## proton leak                0.86897345 1.0054747 0.94232066  1.072861
## maximal respiration        0.27177986 1.0054096 0.99578218  1.015130
## spare respiratory capacity 0.14010915 1.0085995 0.99719158  1.020138
## glycolysis                 0.38446733 1.0468223 0.94424958  1.160537
## glycolytic capacity        0.06412587 1.0360918 0.99792112  1.075723
## glycolytic reserve         0.04355106 1.0517270 1.00146086  1.104516
## ECAR/OCR                   0.63296402 0.4761677 0.02265924 10.006323
## OCR                        0.51066854 1.0123061 0.97607840  1.049878
## ECAR                       0.69072636 0.9805743 0.89025029  1.080062
#OS result
os.U
##                                     p        HR       lower    higher
## basal respiration          0.35128211 1.0296727 0.968269501  1.094970
## ATP production             0.40353629 1.0318067 0.958719943  1.110465
## proton leak                0.73041348 1.0175484 0.921682426  1.123386
## maximal respiration        0.25523712 1.0084698 0.993923097  1.023229
## spare respiratory capacity 0.28477364 1.0095579 0.992113741  1.027309
## glycolysis                 0.30623093 1.0918435 0.922700156  1.291993
## glycolytic capacity        0.06146802 1.0659453 0.996936433  1.139731
## glycolytic reserve         0.05659909 1.0861820 0.997679695  1.182535
## ECAR/OCR                   0.95731501 0.8838535 0.009613555 81.259948
## OCR                        0.82813436 1.0059134 0.953769322  1.060908
## ECAR                       0.74229021 1.0253082 0.883397069  1.190016

6.3.2.2 KM plot

Function for km plot

#need to be fixed
km <- function(survT, seaName, split, titlePlot, t, hr, c, pvals = NULL) { 
  #function for km plot
  survS <- survT
    #filter_(survT, sprintf("!is.na(%s)",split),
    #              sprintf("!is.na(%s)",seaName))
  k <- survS[ , seaName]
  
  ms5 <- maxstat.test(Surv(T5, treatedAfter)  ~ k, 
                             data = survS,
                             smethod = "LogRank",
                             minprop = 0.2, 
                             maxprop = 0.8, 
                             alpha = NULL)
  ms6 <- maxstat.test(Surv(T6, died) ~ k, 
                             data = survS,
                             smethod = "LogRank",
                             minprop = 0.2, 
                             maxprop = 0.8, 
                             alpha = NULL)

  med <- median(k, na.rm = TRUE)
  
  # median
  if (c=="med") {    
   survS$cutA <- ifelse(k >= med, "high", "low")
   survS$cutM <- ifelse(k >= med, "high", "low")
   survS$cutU <- ifelse(k >= med, "high", "low")
  }
  
  #maxstat & TTT
  if (c=="maxstat" & t=="TTT") {    
   survS$cutA <- ifelse(k >= ms5$estimate, "high", "low")
   survS$cutM <- ifelse(k >= ms5$estimate, "high", "low") 
   survS$cutU <- ifelse(k >= ms5$estimate, "high", "low")
  }
  
  #OS & maxstat
  if (c=="maxstat" & t=="OS") {    
   survS$cutA <- ifelse(k >= ms6$estimate, "high", "low")
   survS$cutM <- ifelse(k >= ms6$estimate, "high", "low") 
   survS$cutU <- ifelse(k >= ms6$estimate, "high", "low")
  }
  
  p <- list()  #list for storing plots
  
  #prepare p value annotations
  if (is.null(pvals)){
    pvalList <- rep(FALSE,3)
  } else {
    pvalList <- pvals
  }
  
  #subset according to genetic factor
  survM <- survS[survS[,split] %in% 1,]
  survU <- survS[survS[,split] %in% 0,]
  inputName <- formatSea(seaName)
  if (t=="TTT") {
         yl <- "Fraction w/o treatment"
         #if (c=="med"){ cat(sprintf("%s median-cutpoint for TTT: %5.2f\n", inputName, median(k) ) ) } else 
         #  { cat(sprintf("%s cutpoint for TTT: %5.2g\n", inputName, ms5$estimate )) }
         p[[1]] <- ggsurvplot(survfit(Surv(T5, treatedAfter) ~ cutA, data = survS), 
                              data = survS, pval = pvalList[1],
                              ylab = yl, xlab = "Time (years)", title = inputName,
                              legend.labs = c("high","low"), palette = c("red","blue"),
                              ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
                                              axis.text = element_text(size= 15)))$plot
         p[[2]] <- ggsurvplot(survfit(Surv(T5, treatedAfter) ~ cutM, data = survM),
                              data = survS, pval = pvalList[2],
                              ylab = yl, xlab = "Time (years)",
                              title = paste(inputName, titlePlot[1], titlePlot[3]),
                              legend.labs = c("high","low"), palette = c("red","blue"),
                              ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
                                              axis.text = element_text(size= 15)))$plot
         p[[3]] <- ggsurvplot(survfit(Surv(T5, treatedAfter) ~ cutU, data = survU),
                              data = survS, pval = pvalList[3],
                              ylab = yl, xlab = "Time (years)", 
                              title = paste(inputName, titlePlot[1], titlePlot[2]),
                              legend.labs = c("high","low"), palette = c("red","blue"),
                              ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
                                              axis.text = element_text(size= 15)))$plot
         }
   # OS  
   else {
         yl <- "Fraction overall survival"
         #if (c=="med"){ cat(sprintf("%s median-cutpoint for OS: %5.2f\n", inputName, median(k) ) ) } else 
         #  { cat(sprintf("%s cutpoint for OS: %5.2f\n", inputName, ms6$estimate )) }
         p[[1]] <- ggsurvplot(survfit(Surv(T5, died) ~ cutA, data = survS), data = survS,
                              ylab = yl, xlab = "Time (years)", pval = pvalList[1],
                              title = inputName, legend.labs = c("high","low"), palette = c("red","blue"),
                              ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
                                              axis.text = element_text(size= 15)))$plot
         p[[2]] <- ggsurvplot(survfit(Surv(T5, died) ~ cutM, data = survM), data = survM,
                              ylab = yl, xlab = "Time (years)", pval = pvalList[2],
                              title = paste(inputName, titlePlot[1], titlePlot[3]), 
                              legend.labs = c("high","low"), palette = c("red","blue"),
                              ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
                                              axis.text = element_text(size= 15)))$plot 
         p[[3]] <- ggsurvplot(survfit(Surv(T5, died) ~ cutU, data = survU), data = survU,
                              ylab = yl, xlab = "Time (years)", pval = pvalList[3],
                              title = paste(inputName, titlePlot[1], titlePlot[2]), 
                              legend.labs = c("high","low"), palette = c("red","blue"),
                              ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
                                              axis.text = element_text(size= 15)))$plot 
   }
  p
}

6.3.2.3 Using maxstat

Time to next treatment (maxstat).

TTTplot.maxstat <- list()

for (seaName in rownames(seaCombat)) {
  seaName.format <- formatSea(seaName)
  pvals <- c(ttt[seaName.format ,"p"], ttt.M[seaName.format ,"p"], ttt.U[seaName.format ,"p"])
  pvals <- sapply(pvals, function(x) {
    sprintf("p value = %1.2f",x)
  })
  TTTplot.maxstat[[seaName]] <- km(survT, seaName = seaName, split = "IGHV", t="TTT",
   titlePlot =c("(IGHV", "umutated)", "mutated)"),  hr="tr", c="maxstat", pvals = pvals)
}

Overall survival (maxstat).

OSplot.maxstat <- list()

for (seaName in rownames(seaCombat)) {
  seaName.format <- formatSea(seaName)
  pvals <- c(os[seaName.format ,"p"], os.M[seaName.format ,"p"], os.U[seaName.format ,"p"])
  pvals <- sapply(pvals, function(x) {
    sprintf("p value = %1.2f",x)
  })
  OSplot.maxstat[[seaName]] <- km(survT, seaName = seaName, split = "IGHV", t="OS",
   titlePlot =c("(IGHV", "umutated)", "mutated)"),  hr="tr", c="maxstat", pvals = pvals)
}

KM plot for all samples (not shown in manuscript)

plot_grid(TTTplot.maxstat$glycolytic.reserve[[1]], TTTplot.maxstat$maximal.respiration[[1]],
          TTTplot.maxstat$spare.respiratory.capacity[[1]],
          OSplot.maxstat$glycolytic.capacity[[1]], OSplot.maxstat$glycolytic.reserve[[1]],
          labels = "AUTO")

KM plot for IGHV stratified samples (not shown in manuscript)

plot_grid(TTTplot.maxstat$glycolytic.reserve[[2]], TTTplot.maxstat$glycolytic.reserve[[3]],
          TTTplot.maxstat$maximal.respiration[[2]], TTTplot.maxstat$maximal.respiration[[3]],
          TTTplot.maxstat$spare.respiratory.capacity[[2]], TTTplot.maxstat$spare.respiratory.capacity[[3]],
          OSplot.maxstat$glycolytic.capacity[[2]], OSplot.maxstat$glycolytic.capacity[[3]],
          OSplot.maxstat$glycolytic.reserve[[2]], OSplot.maxstat$glycolytic.reserve[[3]],
          labels = "AUTO", ncol = 2)

6.3.2.4 Using median

Time to next treatment (median).

TTTplot.med <- list()

for (seaName in rownames(seaCombat)) {
  seaName.format <- formatSea(seaName)
  pvals <- c(ttt[seaName.format ,"p"], ttt.M[seaName.format ,"p"], ttt.U[seaName.format ,"p"])
  pvals <- sapply(pvals, function(x) {
    sprintf("p value = %1.2f",x)
  })
  TTTplot.med[[seaName]] <- km(survT, seaName = seaName, split = "IGHV", t="TTT",
   titlePlot =c("(IGHV", "umutated)", "mutated)"),  hr="tr", c="med", pvals = pvals)
}

Overall survival (median).

OSplot.med <- list()

for (seaName in rownames(seaCombat)) {
  seaName.format <- formatSea(seaName)
  pvals <- c(os[seaName.format ,"p"], os.M[seaName.format ,"p"], os.U[seaName.format ,"p"])
  pvals <- sapply(pvals, function(x) {
    sprintf("p value = %1.2f",x)
  })
  OSplot.med[[seaName]] <- km(survT, seaName = seaName, split = "IGHV", t="OS",
   titlePlot =c("(IGHV", "umutated)", "mutated)"),  hr="tr", c="med", pvals = pvals)
}

KM plot for all samples (Figure 6)

plot_grid(TTTplot.med$glycolytic.reserve[[1]], TTTplot.med$maximal.respiration[[1]],
          TTTplot.med$spare.respiratory.capacity[[1]],
          OSplot.med$glycolytic.capacity[[1]], OSplot.med$glycolytic.reserve[[1]],
          labels = "AUTO")

KM plot for IGHV stratified samples (not shown in manuscript)

plot_grid(TTTplot.med$glycolytic.reserve[[2]], TTTplot.med$glycolytic.reserve[[3]],
          TTTplot.med$maximal.respiration[[2]], TTTplot.med$maximal.respiration[[3]],
          TTTplot.med$spare.respiratory.capacity[[2]], TTTplot.med$spare.respiratory.capacity[[3]],
          OSplot.med$glycolytic.capacity[[2]], OSplot.med$glycolytic.capacity[[3]],
          OSplot.med$glycolytic.reserve[[2]], OSplot.med$glycolytic.reserve[[3]],
          labels = "AUTO", ncol = 2)

6.4 Multi-variate cox model

6.4.1 Prepare dataset

seaTable <- assays(seaCombat)$seaMedian

survT = patmeta[colnames(seaTable),]
survT[which(survT[,"IGHV"]=="U") ,"IGHV"] = 0
survT[which(survT[,"IGHV"]=="M") ,"IGHV"] = 1
survT$IGHV = as.numeric(survT$IGHV)

colnames(survT) = gsub("Age4Main", "age", colnames(survT))

#add seahorse measurement information

survT <- cbind(survT, t(mscale(seaTable)))

# competinting risk endpoint fpr 
survT$compE <- ifelse(survT$treatedAfter == TRUE, 1, 0)
survT$compE <- ifelse(survT$treatedAfter == FALSE & survT$died==TRUE,
                      2, survT$compE )
survT$T7  <- ifelse(survT$compE == 1, survT$T5, survT$T6 )

#genetic variants
survT$SF3B1      <- exprs(lpdAll)[ "SF3B1",      rownames(survT)  ]
survT$NOTCH1     <- exprs(lpdAll)[ "NOTCH1",     rownames(survT)  ]
survT$BRAF       <- exprs(lpdAll)[ "BRAF",       rownames(survT)  ]
survT$TP53       <- exprs(lpdAll)[ "TP53",       rownames(survT)  ]
survT$del17p13   <- exprs(lpdAll)[ "del17p13",   rownames(survT)  ]
survT$del11q22.3 <- exprs(lpdAll)[ "del11q22.3", rownames(survT)  ]
survT$trisomy12 <-  exprs(lpdAll)[ "trisomy12", rownames(survT)  ]
extractSome <- function(x) {
  sumsu <- summary(x)
  data.frame(
    `p-value`      = 
      sprintf("%6.3g", sumsu[["coefficients"]][, "Pr(>|z|)"]),
    `HR`           = 
      sprintf("%6.3g", signif( sumsu[["coefficients"]][, "exp(coef)"], 2) ), 
    `lower 95% CI` = 
      sprintf("%6.3g", signif( sumsu[["conf.int"]][, "lower .95"], 2) ),
    `upper 95% CI` = 
      sprintf("%6.3g", signif( sumsu[["conf.int"]][, "upper .95"], 2),
              check.names = FALSE) )
}

Define covariates and effects.

6.4.1.1 TTT

glycolytic reserve

surv1 <- coxph(
  Surv(T5, treatedAfter) ~  
    age +
    as.factor(IC50beforeTreatment) +
    as.factor(trisomy12) +
    as.factor(del11q22.3) +
    as.factor(del17p13) +
    as.factor(TP53) +
    IGHVwt +
    glycolytic.reserve,       # continuous
  data = survT )

colFactor <- data.frame(factor = c("age", "pretreatment", 
                                   "trisomy12", "del11q22.3", 
                                   "del17p13","TP53","U-CLL",
                                   "glycolytic reserve"))

outTab <- cbind(colFactor,extractSome(surv1))

write(print(xtable(outTab,
            caption = "Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = "section05/glyRes_TTT.tex")
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Thu May 17 16:26:29 2018
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate} 
## \begin{tabular}{lllll}
##   \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\ 
##   \hline
## age & 0.0744 &   0.81 &   0.64 &      1 \\ 
##   pretreatment & 2.3e-05 &   0.21 &    0.1 &   0.43 \\ 
##   trisomy12 &   0.32 &    1.6 &   0.64 &    3.8 \\ 
##   del11q22.3 &  0.679 &   0.83 &   0.36 &      2 \\ 
##   del17p13 &  0.658 &    1.2 &   0.52 &    2.9 \\ 
##   TP53 &  0.123 &    1.9 &   0.84 &    4.1 \\ 
##   U-CLL & 0.0863 &    1.8 &   0.92 &    3.7 \\ 
##   glycolytic reserve &  0.413 &    1.2 &   0.76 &    1.9 \\ 
##    \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n", 
            summary(surv1)$n, summary(surv1)[6] ) )
## 89 patients considerd in the model; number of events 47

maximal respiration

surv1 <- coxph(
  Surv(T5, treatedAfter) ~  
    age +
    as.factor(IC50beforeTreatment) +
    as.factor(trisomy12) +
    as.factor(del11q22.3) +
    as.factor(del17p13) +
    as.factor(TP53) +
    IGHVwt +
    maximal.respiration,       # continuous
  data = survT )

colFactor <- data.frame(factor = c("age", "pretreatment", 
                                   "trisomy12", "del11q22.3", 
                                   "del17p13","TP53","U-CLL",
                                   "maximal respiration"))

outTab <- cbind(colFactor,extractSome(surv1))

write(print(xtable(outTab,
            caption = "Multivariate Cox regression model for time to treatment with maximal respiration as a covariate"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = "section05/maxRes_TTT.tex")
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Thu May 17 16:26:29 2018
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for time to treatment with maximal respiration as a covariate} 
## \begin{tabular}{lllll}
##   \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\ 
##   \hline
## age & 0.0183 &   0.78 &   0.63 &   0.96 \\ 
##   pretreatment & 4.13e-05 &   0.25 &   0.13 &   0.48 \\ 
##   trisomy12 &  0.115 &      2 &   0.85 &    4.5 \\ 
##   del11q22.3 &  0.604 &    1.2 &   0.56 &    2.7 \\ 
##   del17p13 &  0.918 &   0.96 &    0.4 &    2.3 \\ 
##   TP53 &  0.101 &      2 &   0.87 &    4.5 \\ 
##   U-CLL & 0.0276 &    2.1 &    1.1 &    3.9 \\ 
##   maximal respiration & 0.0562 &    1.4 &   0.99 &      2 \\ 
##    \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n", 
            summary(surv1)$n, summary(surv1)[6] ) )
## 102 patients considerd in the model; number of events 55

spare respiratory capacity

surv1 <- coxph(
  Surv(T5, treatedAfter) ~  
    age +
    as.factor(IC50beforeTreatment) +
    as.factor(trisomy12) +
    as.factor(del11q22.3) +
    as.factor(del17p13) +
    as.factor(TP53) +
    IGHVwt +
    spare.respiratory.capacity,       # continuous
  data = survT )

colFactor <- data.frame(factor = c("age", "pretreatment", 
                                   "trisomy12", "del11q22.3", 
                                   "del17p13","TP53","U-CLL",
                                   "spare.respiratory.capacity"))

outTab <- cbind(colFactor,extractSome(surv1))

write(print(xtable(outTab,
            caption = "Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = "section05/spResCap_TTT.tex")
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Thu May 17 16:26:29 2018
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate} 
## \begin{tabular}{lllll}
##   \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\ 
##   \hline
## age & 0.0183 &   0.78 &   0.63 &   0.96 \\ 
##   pretreatment & 4.06e-05 &   0.25 &   0.13 &   0.48 \\ 
##   trisomy12 &  0.109 &      2 &   0.86 &    4.5 \\ 
##   del11q22.3 &  0.631 &    1.2 &   0.55 &    2.7 \\ 
##   del17p13 &  0.911 &   0.95 &    0.4 &    2.3 \\ 
##   TP53 &  0.114 &    1.9 &   0.85 &    4.4 \\ 
##   U-CLL & 0.0256 &    2.1 &    1.1 &      4 \\ 
##   spare.respiratory.capacity & 0.0505 &    1.4 &      1 &      2 \\ 
##    \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n", 
            summary(surv1)$n, summary(surv1)[6] ) )
## 102 patients considerd in the model; number of events 55

6.4.2 OS

glycolytic reserve

surv1 <- coxph(
  Surv(T6, died) ~  
    age +
    as.factor(IC50beforeTreatment) +
    as.factor(trisomy12) +
    as.factor(del11q22.3) +
    as.factor(del17p13) +
    as.factor(TP53) +
    IGHVwt +
    glycolytic.reserve,       # continuous
  data = survT )

colFactor <- data.frame(factor = c("age", "pretreatment", 
                                   "trisomy12", "del11q22.3", 
                                   "del17p13","TP53","U-CLL",
                                   "glycolytic reserve"))

outTab <- cbind(colFactor,extractSome(surv1))

write(print(xtable(outTab,
            caption = "Multivariate Cox regression model for overall survival with glycolytic reserve as a covariate"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = "section05/glyRes_OS.tex")
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Thu May 17 16:26:29 2018
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for overall survival with glycolytic reserve as a covariate} 
## \begin{tabular}{lllll}
##   \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\ 
##   \hline
## age &   0.69 &    1.1 &   0.74 &    1.6 \\ 
##   pretreatment & 0.0013 &  0.087 &   0.02 &   0.39 \\ 
##   trisomy12 & 0.0663 &    4.6 &    0.9 &     24 \\ 
##   del11q22.3 &  0.322 &   0.49 &   0.12 &      2 \\ 
##   del17p13 &  0.761 &   0.79 &   0.17 &    3.7 \\ 
##   TP53 &  0.685 &    1.3 &   0.36 &    4.8 \\ 
##   U-CLL & 0.0817 &    3.1 &   0.87 &     11 \\ 
##   glycolytic reserve &   0.28 &    1.5 &   0.72 &    3.1 \\ 
##    \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n", 
            summary(surv1)$n, summary(surv1)[6] ) )
## 95 patients considerd in the model; number of events 16

glycolytic capacity

surv1 <- coxph(
  Surv(T6, died) ~  
    age +
    as.factor(IC50beforeTreatment) +
    as.factor(trisomy12) +
    as.factor(del11q22.3) +
    as.factor(del17p13) +
    as.factor(TP53) +
    IGHVwt +
    glycolytic.capacity,       # continuous
  data = survT )

colFactor <- data.frame(factor = c("age", "pretreatment", 
                                   "trisomy12", "del11q22.3", 
                                   "del17p13","TP53","U-CLL",
                                   "glycolytic capacity"))

outTab <- cbind(colFactor,extractSome(surv1))

write(print(xtable(outTab,
            caption = "Multivariate Cox regression model for overall survival with glycolytic capacity as a covariate"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = "section05/glyCap_OS.tex")
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Thu May 17 16:26:29 2018
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for overall survival with glycolytic capacity as a covariate} 
## \begin{tabular}{lllll}
##   \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\ 
##   \hline
## age &  0.738 &    1.1 &   0.73 &    1.6 \\ 
##   pretreatment & 0.00114 &  0.082 &  0.018 &   0.37 \\ 
##   trisomy12 & 0.0687 &    4.5 &   0.89 &     23 \\ 
##   del11q22.3 &  0.251 &   0.44 &   0.11 &    1.8 \\ 
##   del17p13 &  0.676 &   0.72 &   0.15 &    3.4 \\ 
##   TP53 &  0.679 &    1.3 &   0.35 &    4.9 \\ 
##   U-CLL & 0.0871 &    3.1 &   0.85 &     11 \\ 
##   glycolytic capacity &  0.383 &    1.4 &   0.64 &    3.2 \\ 
##    \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n", 
            summary(surv1)$n, summary(surv1)[6] ) )
## 95 patients considerd in the model; number of events 16

7 Multivairate analysis explaining heterogeneity in Seahorse data

7.1 Building multi-variate models that predict CLL bioenergetic features

Loading the data.

data(list=c("conctab", "drpar", "drugs", "patmeta", "lpdAll", "dds", "mutCOM",
"methData","seaCombat","pretreat"))

7.1.1 Data pre-processing

For gene expression and methylation data

#only consider CLL patients
CLLPatients<-rownames(patmeta)[which(patmeta$Diagnosis=="CLL")]

e<-dds
colnames(e)<-colData(e)$PatID

#Methylation Data
methData = t(assay(methData))
methPCA <- prcomp(methData, center = T, scale. = TRUE)$x[,1:20]

#RNA Data
eCLL<-e[,colnames(e) %in% CLLPatients]

#filter out genes without gene name
eCLL<-eCLL[(!rowData(eCLL)$symbol %in% c("",NA)),]

#filter out low count genes
###
minrs <- 100
rs  <- rowSums(assay(eCLL))
eCLL<-eCLL[ rs >= minrs, ]

#variance stabilize the data
vstCounts<-varianceStabilizingTransformation(eCLL)
vstCounts<-assay(vstCounts)

#filter out low variable genes
ntop<-5000
vstCountsFiltered<-vstCounts[order(apply(vstCounts, 1, var, na.rm=T),
                                   decreasing = T)[1:ntop],]
eData<-t(vstCountsFiltered)

#Prepare PCA
pcRes <- prcomp(eData, center = T, scale. = TRUE)
rnaPCA <- pcRes$x[,1:20]
pcLoad <- pcRes$rotation[,1:20]

For genetic data

#genetics
mutCOMbinary<-channel(mutCOM, "binary")
mutCOMbinary<-mutCOMbinary[featureNames(mutCOMbinary) %in% colnames(seaCombat),]
genData<-exprs(mutCOMbinary)
idx <- which(colnames(genData) %in% c("del13q14_bi", "del13q14_mono"))
genData <- genData[,-idx]
colnames(genData)[which(colnames(genData)=="del13q14_any")] = "del13q14"

#remove gene with higher than 20% missing values
genData <- genData[,colSums(is.na(genData))/nrow(genData) <= 0.2]

#fill the missing value with majority
genData <- apply(genData, 2, function(x) {
  xVec <- x
  avgVal <- mean(x,na.rm= TRUE)
  if (avgVal >= 0.5) {
    xVec[is.na(xVec)] <- 1
  } else xVec[is.na(xVec)] <- 0
  xVec
})

For IGHV and methylation cluster

#IGHV
translation <- c(`U` = 0, `M` = 1)
stopifnot(all(patmeta$IGHV %in% c("U","M", NA)))
IGHVData <- matrix(translation[patmeta$IGHV], 
                   dimnames = list(rownames(patmeta), "IGHV"), ncol = 1)
IGHVData<-IGHVData[rownames(IGHVData) %in% CLLPatients,,drop=F]
#remove patiente with NA IGHV status
IGHVData<-IGHVData[!is.na(IGHVData), ,drop=F]

# Methylation cluster
translation <- c(`HP` = 2, `IP` = 1, `LP` = 0)
Mcluster <- matrix(translation[patmeta$ConsClust],
                   dimnames = list(rownames(patmeta), "ConsCluster"), ncol = 1)
Mcluster <- Mcluster[rownames(Mcluster) %in% CLLPatients,,drop=F]
Mcluster <- Mcluster[!is.na(Mcluster), ,drop=F]

For demographic and clinical data

#demographics (age and sex)
patmeta<-subset(patmeta, Diagnosis=="CLL")
gender <- ifelse(patmeta[,"Gender"]=="m",0,1)


# impute missing values in age by mean
ImputeByMean <- function(x) {x[is.na(x)] <- mean(x, na.rm=TRUE); return(x)}
age<-ImputeByMean(patmeta[,"Age4Main"])


demogrData <- cbind(age=age,gender=gender)
rownames(demogrData) <- rownames(patmeta)

#Pretreatment
pretreated<- pretreat

For drug response data

#Remove bad drugs. Bortezomib lost its activity during storage. The data for this drug and NSC 74859 were discarded from further analysis.
badrugs = c("D_008", "D_025") 
lpdCLL <- lpdAll[!fData(lpdAll)$id %in% badrugs, pData(lpdAll)$Diagnosis == "CLL"]

# get drug responsee data
get.drugresp <- function(lpd) {
  drugresp = t(exprs(lpd[fData(lpd)$type == 'viab'])) %>%
    tbl_df %>% dplyr::select(-ends_with(":5")) %>%
    dplyr::mutate(ID = colnames(lpd)) %>%
    tidyr::gather(drugconc, viab, -ID) %>%
    dplyr::mutate(drug = drugs[substring(drugconc, 1, 5), "name"],
           conc = sub("^D_([0-9]+_)", "", drugconc)) %>%
    dplyr::mutate(conc = as.integer(gsub("D_CHK_", "", conc)))
  
  drugresp
}
drugresp <- get.drugresp(lpdCLL)

#Use median polish to summarise drug response of the five concentrations
get.medp <- function(drugresp) {
  tab = drugresp %>% group_by(drug, conc) %>% 
    do(data.frame(v = .$viab, ID = .$ID)) %>% spread(ID, v)
  
  med.p = lapply(unique(tab$drug), function(n) {
    tb = filter(tab, drug == n) %>% ungroup() %>% 
      dplyr::select(-(drug:conc)) %>% 
      as.matrix %>% `rownames<-`(1:5)
    mdp = stats::medpolish(tb, trace.iter = FALSE)
    df = as.data.frame(mdp$col) + mdp$overall
    colnames(df) <- n
    df
  }) %>% bind_cols()
  
  
  
  medp.viab = tbl_df(med.p) %>% mutate(ID = colnames(tab)[3:ncol(tab)]) %>%
    gather(drug, viab, -ID) 
  medp.viab
}
drugresp.mp <- get.medp(drugresp)
viabData <- dcast(drugresp.mp, ID ~ drug, value.var = "viab" )
rownames(viabData) <- viabData$ID
viabData$ID <- NULL

Prepare seahorse meaurement (response vector)

sea <- t(assays(seaCombat)$seaMedian)

Function to Generate the explanatory dataset for each seahorse measurements

#function to generate response vector and explainatory variable for each seahorse measurement

generateData <- function(inclSet, onlyCombine = FALSE, censor = NULL, robust = FALSE) {
    
    dataScale <- function(x, censor = NULL, robust = FALSE) {
        #function to scale different variables
        if (length(unique(na.omit(x))) == 2){
          #a binary variable, change to -0.5 and 0.5 for 1 and 2
          x - 0.5
        } else if (length(unique(na.omit(x))) == 3) {
          #catagorical varialbe with 3 levels, methylation_cluster, change to -0.5,0,0.5
          (x - 1)/2
        } else {
          if (robust) {
          #continuous variable, centered by median and divied by 2*mad
          mScore <- (x-median(x,na.rm=TRUE))/(1.4826*mad(x,na.rm=TRUE))
            if (!is.null(censor)) {
              mScore[mScore > censor] <- censor
              mScore[mScore < -censor] <- -censor
            }
          mScore/2
          } else {
            mScore <- (x-mean(x,na.rm=TRUE))/(sd(x,na.rm=TRUE))
              if (!is.null(censor)) {
                mScore[mScore > censor] <- censor
                mScore[mScore < -censor] <- -censor
              }
          mScore/2
          }
        }
      }
    
    
    
    allResponse <- list()
    allExplain <- list()

    for (measure in colnames(inclSet$seahorse)) {
      y <- inclSet$seahorse[,measure]
      y <- y[!is.na(y)]
      
      #get overlapped samples for each dataset 
      overSample <- names(y)
      
      for (eachSet in inclSet) {
        overSample <- intersect(overSample,rownames(eachSet))
      }
      
      y <- dataScale(y[overSample], censor = censor, robust = robust)
      
      #generate explainatory variable table for each seahorse measurement
      expTab <- list()
      
      if ("drugs" %in% names(inclSet)) {
        viabTab <- inclSet$drugs[overSample,]
        vecName <- sprintf("drugs(%s)",ncol(viabTab))
        colnames(viabTab) <- paste0("con.",colnames(viabTab))
        expTab[[vecName]] <- apply(viabTab,2,dataScale,censor = censor, robust = robust)
      }
      
      if ("gen" %in% names(inclSet)) {
        geneTab <- inclSet$gen[overSample,]
        #at least 3 mutated sample
        geneTab <- geneTab[, colSums(geneTab) >= 3]
        vecName <- sprintf("genetic(%s)", ncol(geneTab))
        expTab[[vecName]] <- apply(geneTab,2,dataScale)
      }
      
      
      if ("RNA" %in% names(inclSet)){
        
        #for PCA
        rnaPCA <- inclSet$RNA[overSample, ]
        colnames(rnaPCA) <- paste0("con.expression",colnames(rnaPCA), sep = "")
        expTab[["expression(20)"]] <- apply(rnaPCA,2,dataScale, censor = censor, robust = robust)
        
      }
        
      
      if ("meth" %in% names(inclSet)){
        methPCA <- inclSet$meth[overSample,]
        colnames(methPCA) <- paste("con.methylation",colnames(methPCA),sep = "")
        expTab[["methylation(20)"]] <- apply(methPCA[,1:20],2,dataScale, censor = censor, robust = robust)
      }
      
      if ("IGHV" %in% names(inclSet)) {
        IGHVtab <- inclSet$IGHV[overSample,,drop=FALSE]
        expTab[["IGHV(1)"]] <- apply(IGHVtab,2,dataScale)
      }
      
      if ("Mcluster" %in% names(inclSet)) {
        methTab <- inclSet$Mcluster[overSample,,drop=FALSE]
        colnames(methTab) <- "methylation cluster"
        expTab[["methCluster(1)"]] <- apply(methTab,2,dataScale)
      }
      
      if ("demographics" %in% names(inclSet)){
        demoTab <- inclSet$demographics[overSample,]
        vecName <- sprintf("demographics(%s)", ncol(demoTab))
        expTab[[vecName]] <- apply(demoTab,2,dataScale)
      }
      
      if ("pretreated" %in% names(inclSet)){
        preTab <- inclSet$pretreated[overSample,,drop=FALSE]
        expTab[["pretreated(1)"]] <- apply(preTab,2,dataScale)
      }
      
      comboTab <- c()
      for (eachSet in names(expTab)){
        comboTab <- cbind(comboTab, expTab[[eachSet]])
      }
      vecName <- sprintf("all(%s)", ncol(comboTab))
      expTab[[vecName]] <- comboTab
      
      measureName <- sprintf("%s(%s)",formatSea(measure),length(y))
      allResponse[[measureName]] <- y
      allExplain[[measureName]] <- expTab
    }
  if (onlyCombine) {
    #only return combined results, for feature selection
    allExplain <- lapply(allExplain, function(x) x[length(x)])
  }
    
  return(list(allResponse=allResponse, allExplain=allExplain))

}

7.1.2 Calulate bioenergetic variance explained by multi-omics data set

7.1.2.1 Training models

Clean and integrate multi-omics data

inclSet<-list(RNA=rnaPCA, meth=methPCA, gen=genData, IGHV=IGHVData,
            demographics=demogrData, drugs=viabData, pretreated=pretreated, seahorse = sea)
cleanData <- generateData(inclSet, censor = 4)

Function for multi-variate regression

runGlm <- function(X, y, method = "ridge", repeats=20, folds = 3) {
  modelList <- list()
  lambdaList <- c()
  varExplain <- c()
  coefMat <- matrix(NA, ncol(X), repeats)
  rownames(coefMat) <- colnames(X)

  if (method == "lasso"){
    alpha = 1
  } else if (method == "ridge") {
    alpha = 0
  }
  
  for (i in seq(repeats)) {
    if (ncol(X) > 2) {
      res <- cv.glmnet(X,y, type.measure = "mse", family="gaussian", 
                       nfolds = folds, alpha = alpha, standardize = FALSE)
      lambdaList <- c(lambdaList, res$lambda.min)
      modelList[[i]] <- res
      
      coefModel <- coef(res, s = "lambda.min")[-1] #remove intercept row
      coefMat[,i] <- coefModel
      
      #calculate variance explained
      y.pred <- predict(res, s = "lambda.min", newx = X)
      varExp <- cor(as.vector(y),as.vector(y.pred))^2
      varExplain[i] <- ifelse(is.na(varExp), 0, varExp) 
      
    } else {
      fitlm<-lm(y~., data.frame(X))
      varExp <- summary(fitlm)$r.squared
      varExplain <- c(varExplain, varExp)
      
    }

  }
  list(modelList = modelList, lambdaList = lambdaList, varExplain = varExplain, coefMat = coefMat)
}

Perform lasso regression

lassoResults <- list()
for (eachMeasure in names(cleanData$allResponse)) {
  dataResult <- list()
  for (eachDataset in names(cleanData$allExplain[[eachMeasure]])) {
    y <- cleanData$allResponse[[eachMeasure]]
    X <- cleanData$allExplain[[eachMeasure]][[eachDataset]]
  
   
    glmRes <- runGlm(X, y, method = "lasso", repeats = 100, folds = 3)
    dataResult[[eachDataset]] <- glmRes 
  }
  lassoResults[[eachMeasure]] <- dataResult
  
}

7.1.2.2 Ploting results

Function for plotting variance explained for each measurement

Show the plot

grid.arrange(grobs = varList, ncol = 4)

7.1.3 Using LASSO model to select important features

7.1.3.1 Training models

Prepare clean data for feature selection

inclSet<-list(RNA=rnaPCA, gen=genData, IGHV=IGHVData, 
              drugs=viabData, seahorse = sea)
cleanData <- generateData(inclSet, onlyCombine = TRUE, censor = 4)

Perform lasso regression

lassoResults <- list()
for (eachMeasure in names(cleanData$allResponse)) {
  dataResult <- list()
  for (eachDataset in names(cleanData$allExplain[[eachMeasure]])) {
    y <- cleanData$allResponse[[eachMeasure]]
    X <- cleanData$allExplain[[eachMeasure]][[eachDataset]]
  
   
    glmRes <- runGlm(X, y, method = "lasso", repeats = 100, folds = 3)
    dataResult[[eachDataset]] <- glmRes 
  }
  lassoResults[[eachMeasure]] <- dataResult
  
}

7.1.4 Ploting results

Function for the heatmap plot

lassoPlot <- function(lassoOut, cleanData, freqCut = 1, coefCut = 0.01, setNumber = "last") {
  plotList <- list()
  if (setNumber == "last") {
    setNumber <- length(lassoOut[[1]])
  } else {
    setNumber <- setNumber
  }
  for (seaName in names(lassoOut)) {
    #for the barplot on the left of the heatmap
    barValue <- rowMeans(lassoOut[[seaName]][[setNumber]]$coefMat)
    freqValue <- rowMeans(abs(sign(lassoOut[[seaName]][[setNumber]]$coefMat)))
    barValue <- barValue[abs(barValue) >= coefCut & freqValue >= freqCut] # a certain threshold
    barValue <- barValue[order(barValue)]
    if(length(barValue) == 0) {
      plotList[[seaName]] <- NA
      next
    }

    #for the heatmap and scatter plot below the heatmap
    allData <- cleanData$allExplain[[seaName]][[setNumber]]
    seaValue <- cleanData$allResponse[[seaName]]*2 #back to Z-score
    
    tabValue <- allData[, names(barValue),drop=FALSE]
    ord <- order(seaValue)
    seaValue <- seaValue[ord]
    tabValue <- tabValue[ord, ,drop=FALSE]
    sampleIDs <- rownames(tabValue)
    tabValue <- as.tibble(tabValue)
    
    #change scaled binary back to catagorical
    for (eachCol in colnames(tabValue)) {
      if (strsplit(eachCol, split = "[.]")[[1]][1] != "con") {
        tabValue[[eachCol]] <- as.integer(as.factor(tabValue[[eachCol]]))
      }
      else {
        tabValue[[eachCol]] <- tabValue[[eachCol]]*2 #back to Z-score
      }
    }
    
    tabValue$Sample <- sampleIDs
    #Mark different rows for different scaling in heatmap
    matValue <- gather(tabValue, key = "Var",value = "Value", -Sample)
    matValue$Type <- "mut"
    
    #For continuious value
    matValue$Type[grep("con.",matValue$Var)] <- "con"
    
    #for methylation_cluster
    matValue$Type[grep("ConsCluster",matValue$Var)] <- "meth"
    
    #change the scale of the value, let them do not overlap with each other
    matValue[matValue$Type == "mut",]$Value = matValue[matValue$Type == "mut",]$Value + 10
    matValue[matValue$Type == "meth",]$Value = matValue[matValue$Type == "meth",]$Value + 20
    
    
    #color scale for viability
    idx <- matValue$Type == "con"
    
    myCol <- colorRampPalette(c('dark red','white','dark blue'), 
                   space = "Lab")
    if (sum(idx) != 0) {
      matValue[idx,]$Value = round(matValue[idx,]$Value,digits = 2)
      minViab <- min(matValue[idx,]$Value)
      maxViab <- max(matValue[idx,]$Value)
      limViab <- max(c(abs(minViab), abs(maxViab)))
      scaleSeq1 <- round(seq(-limViab, limViab,0.01), digits=2)
      color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
    } else {
      scaleSeq1 <- round(seq(0,1,0.01), digits=2)
      color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
    }
    

    
    #change continues measurement to discrete measurement
    matValue$Value <- factor(matValue$Value,levels = sort(unique(matValue$Value)))
    
    #change order of heatmap
    names(barValue) <-  gsub("con.", "", names(barValue))
    matValue$Var <- gsub("con.","",matValue$Var)
    matValue$Var <- factor(matValue$Var, levels = names(barValue))
    matValue$Sample <- factor(matValue$Sample, levels = names(seaValue))
    #plot the heatmap
    p1 <- ggplot(matValue, aes(x=Sample, y=Var)) + geom_tile(aes(fill=Value), color = "gray") + 
      theme_bw() + scale_y_discrete(expand=c(0,0)) + theme(axis.text.y=element_text(hjust=0, size=10), axis.text.x=element_blank(), axis.ticks=element_blank(), panel.border=element_rect(colour="gainsboro"),  plot.title=element_text(size=12), panel.background=element_blank(), panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + xlab("patients") + ylab("") + scale_fill_manual(name="Mutated", values=c(color4viab, `11`="gray96", `12`='black', `21`='lightgreen', `22`='green',`23` = 'green4'),guide=FALSE) + ggtitle(seaName)
    
    #Plot the bar plot on the left of the heatmap
    barDF = data.frame(barValue, nm=factor(names(barValue),levels=names(barValue)))
    
    p2 <- ggplot(data=barDF, aes(x=nm, y=barValue)) + 
      geom_bar(stat="identity", fill="lightblue", colour="black", position = "identity", width=.66, size=0.2) + 
      theme_bw() + geom_hline(yintercept=0, size=0.3) + scale_x_discrete(expand=c(0,0.5)) + 
      scale_y_continuous(expand=c(0,0)) + coord_flip(ylim=c(min(barValue),max(barValue))) + 
      theme(panel.grid.major=element_blank(), panel.background=element_blank(), axis.ticks.y = element_blank(),
            panel.grid.minor = element_blank(), axis.text=element_text(size=8), panel.border=element_blank()) +
      xlab("") + ylab("") + geom_vline(xintercept=c(0.5), color="black", size=0.6)
    
    #Plot the scatter plot under the heatmap
    
    # scatterplot below
    scatterDF = data.frame(X=factor(names(seaValue), levels=names(seaValue)), Y=seaValue)
    
    p3 <- ggplot(scatterDF, aes(x=X, y=Y)) + geom_point(shape=21, fill="dimgrey", colour="black", size=1.2) + theme_bw() + theme(panel.grid.minor=element_blank(), panel.grid.major.x=element_blank(), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_text(size=8), panel.border=element_rect(colour="dimgrey", size=0.1), panel.background=element_rect(fill="gray96"))
    
    #Scale bar for continuous variable

    Vgg = ggplot(data=data.frame(x=1, y=as.numeric(names(color4viab))), aes(x=x, y=y, color=y)) + geom_point() + 
      scale_color_gradientn(name="Z-score", colours =color4viab) + theme(legend.title=element_text(size=12), legend.text=element_text(size=10))
    
    #Assemble all the plots togehter

    # construct the gtable
    wdths = c(1.5, 0.2, 1.3*ncol(matValue), 1.4, 1)
    hghts = c(0.1, 0.0015*nrow(matValue), 0.16, 0.5)
    gt = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
    
    ## make grobs
    gg1 = ggplotGrob(p1)
    gg2 = ggplotGrob(p2)
    gg3 = ggplotGrob(p3)
    gg4 = ggplotGrob(Vgg)
    
    ## fill in the gtable
    gt = gtable_add_grob(gt, gtable_filter(gg2, "panel"), 2, 1) # add barplot
    gt = gtable_add_grob(gt, gtable_filter(gg1, "panel"), 2, 3) # add heatmap
    gt = gtable_add_grob(gt, gtable_filter(gg1, "title"), 1, 3) #add title to plot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "panel"), 4, 3) # add scatterplot
    gt = gtable_add_grob(gt, gtable_filter(gg2, "axis-b"), 3, 1) # y axis for barplot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "axis-l"), 4, 2) # y axis for scatter plot
    gt = gtable_add_grob(gt, gtable_filter(gg1, "axis-l"), 2, 4) # variable names
    gt = gtable_add_grob(gt, gtable_filter(gg4, "guide-box"), 2, 5) # scale bar for continous variables

    
    #plot
    #grid.draw(gt)
    plotList[[seaName]] <- gt
  }
  return(plotList)
}

Plot all heatmaps

heatMaps <- lassoPlot(lassoResults, cleanData, freqCut = 0.8)

Arrange for the main figure (Figure 7)

ggdraw() + 
  draw_plot(varList[[1]], 0,0.6,0.25,0.4) + 
  draw_plot(varList[[4]], 0.25,0.6,0.25,0.4) + 
  draw_plot(varList[[6]], 0.5,0.6,0.25,0.4) + 
  draw_plot(varList[[7]], 0.75,0.6,0.25,0.4) +
  draw_plot(heatMaps[[1]], 0, 0, 1, 0.3 ) +
  draw_plot(heatMaps[[6]], 0, 0.3, 1, 0.3 ) + 
  draw_plot_label(c("A","B"), c(0,0), c(1, 0.6),size=15)

Plot other feature for supplementary file

allList <- list(varList[[2]], heatMaps[[2]],
                varList[[4]], heatMaps[[4]],
                varList[[5]], heatMaps[[5]],
                varList[[7]], heatMaps[[7]],
                varList[[8]], heatMaps[[8]],
                varList[[9]], heatMaps[[9]],
                varList[[11]], heatMaps[[11]])

plot_grid(plotlist = allList, rel_widths = c(1,4,1,4),ncol = 4)                

7.2 Charactersing bioligical meanings of expression PCs

Prepare signature sets

gmts = list(H=system.file("extdata","h.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"))

Enrichment using HALLMARK

plotPC <- colnames(pcLoad)[c(2,4, 6, 8,10,11)]
enHallmark <- lapply(plotPC, function(pc) {
  inputTab <- data.frame(ID = rowData(dds[rownames(pcLoad),])$symbol,
                         stat = pcLoad[,pc]) %>%
    arrange(abs(stat)) %>% distinct(ID, .keep_all = TRUE) %>%
    column_to_rownames("ID")
  res <- runGSEA(inputTab = inputTab, gmtFile = gmts$H, GSAmethod = "page")
  res$Name <- gsub("HALLMARK_","", res$Name)
  res
}) 
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
names(enHallmark) <- sapply(plotPC, 
                            function(x) paste("expression",x, collapse = ""))
plotHallmark1 <- jyluMisc::plotEnrichmentBar(enHallmark[1:3], pCut = 0.05, setName = "", ifFDR = TRUE)
plotHallmark2 <- jyluMisc::plotEnrichmentBar(enHallmark[4:6], pCut = 0.05, setName = "", ifFDR = TRUE)

#save to pdf manually
ggdraw() +
  draw_plot(plotHallmark1, 0,0,0.5,1) +
  draw_plot(plotHallmark2, 0.5,0.4,0.5,0.6)


8 End of session

sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.4
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2                   forcats_0.3.0                   
##  [3] stringr_1.3.0                    dplyr_0.7.4                     
##  [5] purrr_0.2.4                      readr_1.1.1                     
##  [7] tidyr_0.8.0                      tibble_1.4.2                    
##  [9] tidyverse_1.2.1                  gtable_0.2.0                    
## [11] reshape2_1.4.3                   RColorBrewer_1.1-2              
## [13] glmnet_2.0-16                    foreach_1.4.4                   
## [15] Matrix_1.2-14                    survminer_0.4.2                 
## [17] ggpubr_0.1.6                     magrittr_1.5                    
## [19] maxstat_0.7-25                   survival_2.42-3                 
## [21] DESeq2_1.20.0                    pheatmap_1.0.8                  
## [23] genefilter_1.62.0                gridExtra_2.3                   
## [25] piano_1.20.0                     cowplot_0.9.2                   
## [27] xtable_1.8-2                     ggbeeswarm_0.6.0                
## [29] ggplot2_2.2.1                    SummarizedExperiment_1.10.0     
## [31] DelayedArray_0.6.0               BiocParallel_1.14.0             
## [33] matrixStats_0.53.1               Biobase_2.40.0                  
## [35] GenomicRanges_1.32.0             GenomeInfoDb_1.16.0             
## [37] IRanges_2.14.1                   S4Vectors_0.18.1                
## [39] BiocGenerics_0.26.0              BloodCancerMultiOmics2017_0.99.1
## [41] seahorseCLL_0.1.0                BiocStyle_2.8.0                 
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.1.0           backports_1.1.2        drc_3.0-1             
##   [4] jyluMisc_0.1.5         Hmisc_4.1-1            fastmatch_1.1-0       
##   [7] plyr_1.8.4             igraph_1.2.1           lazyeval_0.2.1        
##  [10] splines_3.5.0          TH.data_1.0-8          digest_0.6.15         
##  [13] htmltools_0.3.6        gdata_2.18.0           checkmate_1.8.5       
##  [16] memoise_1.1.0          cluster_2.0.7-1        openxlsx_4.0.17       
##  [19] limma_3.36.0           annotate_1.58.0        modelr_0.1.1          
##  [22] sandwich_2.4-0         colorspace_1.3-2       blob_1.1.1            
##  [25] rvest_0.3.2            haven_1.1.1            xfun_0.1              
##  [28] crayon_1.3.4           RCurl_1.95-4.10        jsonlite_1.5          
##  [31] bindr_0.1.1            zoo_1.8-1              iterators_1.0.9       
##  [34] glue_1.2.0             zlibbioc_1.26.0        XVector_0.20.0        
##  [37] car_3.0-0              abind_1.4-5            scales_0.5.0.9000     
##  [40] mvtnorm_1.0-7          DBI_1.0.0              relations_0.6-8       
##  [43] Rcpp_0.12.16           plotrix_3.7            cmprsk_2.2-7          
##  [46] htmlTable_1.11.2       foreign_0.8-70         bit_1.1-12            
##  [49] km.ci_0.5-2            ipflasso_0.1           Formula_1.2-3         
##  [52] htmlwidgets_1.2        httr_1.3.1             fgsea_1.6.0           
##  [55] gplots_3.0.1           acepack_1.4.1          pkgconfig_2.0.1       
##  [58] XML_3.98-1.11          nnet_7.3-12            locfit_1.5-9.1        
##  [61] tidyselect_0.2.4       labeling_0.3           rlang_0.2.0.9001      
##  [64] AnnotationDbi_1.42.0   munsell_0.4.3          cellranger_1.1.0      
##  [67] tools_3.5.0            cli_1.0.0              RSQLite_2.1.0         
##  [70] devtools_1.13.5        broom_0.4.4            evaluate_0.10.1       
##  [73] ggdendro_0.1-20        yaml_2.1.19            knitr_1.20            
##  [76] bit64_0.9-7            survMisc_0.5.4         caTools_1.17.1        
##  [79] nlme_3.1-137           slam_0.1-43            xml2_1.2.0            
##  [82] compiler_3.5.0         rstudioapi_0.7         curl_3.2              
##  [85] beeswarm_0.2.3         marray_1.58.0          geneplotter_1.58.0    
##  [88] stringi_1.2.2          lattice_0.20-35        psych_1.8.3.3         
##  [91] KMsurv_0.1-5           pillar_1.2.2           data.table_1.10.4-3   
##  [94] bitops_1.0-6           R6_2.2.2               latticeExtra_0.6-28   
##  [97] bookdown_0.7           rio_0.5.10             KernSmooth_2.23-15    
## [100] vipor_0.4.5            codetools_0.2-15       MASS_7.3-50           
## [103] gtools_3.5.0           exactRankTests_0.8-29  assertthat_0.2.0      
## [106] rprojroot_1.3-2        withr_2.1.2            mnormt_1.5-5          
## [109] multcomp_1.4-8         GenomeInfoDbData_1.1.0 hms_0.4.2             
## [112] rpart_4.1-13           rmarkdown_1.9          carData_3.0-1         
## [115] sets_1.0-18            lubridate_1.7.4        base64enc_0.1-3